def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): global speculationWords if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() self.specWords, self.specWordStems = readWords(speculationWords) ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName else: self.gazetteer = None self._setDefaultParameters( { "classification": "multiclass", "speculation_words": True }, {"classification": ("multiclass", "speculation", "negation")}) self.styles = self.getParameters(style)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(0) if featureSet == None: featureSet = IdSet(0) ExampleBuilder.__init__(self, classSet, featureSet) self.featureIds = self.featureSet self.labelIds = self.classSet self._setDefaultParameters(["directed", "undirected", "cutoff", "annotated_only", "all_positive", "wv", "epochs", "html", "autoencode", "lr", "patience"]) self.styles = self.getParameters(style) if self.styles["cutoff"]: self.styles["cutoff"] = int(self.styles["cutoff"]) self.wvIndices = None self.embeddingMatrices = None if self.styles.get("wv") != None: indexPath = self.styles.get("wv") + "-indices.json.gz" if not os.path.exists(indexPath): indexPath = os.path.join(Settings.DATAPATH, "wv", indexPath) print >> sys.stderr, "Loading word vector indices from", indexPath with gzip.open(indexPath, "rt") as f: self.wvIndices = json.load(f)["indices"] self.embeddingMatrices = [] self.dimMatrix = 32 self.rangeMatrix = range(self.dimMatrix) self.featureMatrices = [] self.labelMatrices = [] self.tokenLists = []
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) assert( classSet.getId("neg") == 1 ) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName!=None: self.gazetteer=Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer=None self._setDefaultParameters(["rel_features", "wordnet", "bb_features", "giuliano", "epi_merge_negated", "limit_merged_types", "genia_task1", "names", "build_for_nameless", "skip_for_nameless", "pos_only", "all_tokens", "pos_pairs", "linear_ngrams", "phospho", "drugbank_features", "ddi13_features", "metamap", "only_types", "ontobiotope_features", "bb_spans", "w2v", "no_context"]) self.styles = self.getParameters(style) # if "selftrain_group" in self.styles: # self.selfTrainGroups = set() # if "selftrain_group-1" in self.styles: # self.selfTrainGroups.add("-1") # if "selftrain_group0" in self.styles: # self.selfTrainGroups.add("0") # if "selftrain_group1" in self.styles: # self.selfTrainGroups.add("1") # if "selftrain_group2" in self.styles: # self.selfTrainGroups.add("2") # if "selftrain_group3" in self.styles: # self.selfTrainGroups.add("3") # print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["wordnet"]: self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet) if self.styles["bb_features"]: self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens() #self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(PhraseTriggerExampleBuilder.getBacteriaNames()) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet) if self.styles["drugbank_features"]: self.drugFeatureBuilder = DrugFeatureBuilder(featureSet) if self.styles["ontobiotope_features"]: self.ontobiotopeFeatureBuilder = OntoBiotopeFeatureBuilder(self.featureSet) if self.styles["w2v"]: self.wordVectorFeatureBuilder = WordVectorFeatureBuilder(featureSet)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) assert (classSet.getId("neg") == 1) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer = None self._setDefaultParameters([ "rel_features", "wordnet", "bb_features", "giuliano", "epi_merge_negated", "limit_merged_types", "genia_task1", "build_for_nameless", "pos_only", "all_tokens", "names", "pos_pairs", "linear_ngrams", "phospho" ]) self.styles = self.getParameters(style) # if "selftrain_group" in self.styles: # self.selfTrainGroups = set() # if "selftrain_group-1" in self.styles: # self.selfTrainGroups.add("-1") # if "selftrain_group0" in self.styles: # self.selfTrainGroups.add("0") # if "selftrain_group1" in self.styles: # self.selfTrainGroups.add("1") # if "selftrain_group2" in self.styles: # self.selfTrainGroups.add("2") # if "selftrain_group3" in self.styles: # self.selfTrainGroups.add("3") # print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["wordnet"]: self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet) if self.styles["bb_features"]: self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens( ) #self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(PhraseTriggerExampleBuilder.getBacteriaNames()) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): if classSet == None: classSet = IdSet(1) assert classSet.getId("neg") == 1 if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self._setDefaultParameters(["co_limits"]) self.styles = self.getParameters(style) self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = False self.phraseTypeCounts = {}
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) self._setDefaultParameters(["co_limits"]) self.styles = self.getParameters(style) self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = False self.phraseTypeCounts = {}
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None): global speculationWords if classSet == None: classSet = IdSet(1) assert( classSet.getId("neg") == 1 ) if featureSet == None: featureSet = IdSet() self.specWords, self.specWordStems = readWords(speculationWords) ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName!=None: self.gazetteer=Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName else: self.gazetteer=None self._setDefaultParameters({"classification":"multiclass", "speculation_words":True}, {"classification":("multiclass", "speculation", "negation")}) self.styles = self.getParameters(style)
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(0) if featureSet == None: featureSet = IdSet(0) ExampleBuilder.__init__(self, classSet, featureSet) self.featureIds = self.featureSet self.labelIds = self.classSet self._setDefaultParameters([ "directed", "undirected", "cutoff", "annotated_only", "all_positive", "wv", "epochs", "html", "autoencode", "lr", "patience" ]) self.styles = self.getParameters(style) if self.styles["cutoff"]: self.styles["cutoff"] = int(self.styles["cutoff"]) self.wvIndices = None self.embeddingMatrices = None if self.styles.get("wv") != None: indexPath = self.styles.get("wv") + "-indices.json.gz" if not os.path.exists(indexPath): indexPath = os.path.join(Settings.DATAPATH, "wv", indexPath) print >> sys.stderr, "Loading word vector indices from", indexPath with gzip.open(indexPath, "rt") as f: self.wvIndices = json.load(f)["indices"] self.embeddingMatrices = [] self.dimMatrix = 32 self.rangeMatrix = range(self.dimMatrix) self.featureMatrices = [] self.labelMatrices = [] self.tokenLists = []