def train(self, graphs): """ Trains a C{NaiveBayesClassifier} using the edges present in graphs list as positive examples, the edges not present as negative examples. Uses a feature vector of head-word, head-tag, child-word, and child-tag. @type graphs: A list of C{DependencyGraph} @param graphs: A list of dependency graphs to train the scorer. """ # Create training labeled training examples labeled_examples = [] for graph in graphs: for head_node in graph.nodelist: for child_index in range(len(graph.nodelist)): child_node = graph.get_by_address(child_index) if child_index in head_node['deps']: label = "T" else: label = "F" features = [head_node['word'], head_node['tag'], child_node['word'], child_node['tag']] labeled_examples.append((dict(a=head_node['word'],b=head_node['tag'],c=child_node['word'],d=child_node['tag']), label)) # Train the classifier import nltk nltk.usage(nltk.ClassifierI) self.classifier = nltk.classify.NaiveBayesClassifier.train(labeled_examples)
def train(self, graphs): """ Trains a C{NaiveBayesClassifier} using the edges present in graphs list as positive examples, the edges not present as negative examples. Uses a feature vector of head-word, head-tag, child-word, and child-tag. @type graphs: A list of C{DependencyGraph} @param graphs: A list of dependency graphs to train the scorer. """ # Create training labeled training examples labeled_examples = [] for graph in graphs: for head_node in graph.nodelist: for child_index in range(len(graph.nodelist)): child_node = graph.get_by_address(child_index) if child_index in head_node['deps']: label = "T" else: label = "F" features = [ head_node['word'], head_node['tag'], child_node['word'], child_node['tag'] ] labeled_examples.append((dict(a=head_node['word'], b=head_node['tag'], c=child_node['word'], d=child_node['tag']), label)) # Train the classifier import nltk nltk.usage(nltk.ClassifierI) self.classifier = nltk.classify.NaiveBayesClassifier.train( labeled_examples)
# it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # This file is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with GNU Emacs; see the file COPYING. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA, import nltk nltk.usage(nltk.classify.ClassifierI) from pprint import pprint train = [ (dict(a=1, b=1, c=1), 'y'), (dict(a=1, b=1, c=1), 'x'), (dict(a=1, b=1, c=0), 'y'), (dict(a=0, b=1, c=1), 'x'), (dict(a=0, b=1, c=1), 'y'), (dict(a=0, b=0, c=1), 'y'), (dict(a=0, b=1, c=0), 'x'), (dict(a=0, b=0, c=0), 'x'), (dict(a=0, b=1, c=1), 'y'), ] test = [ (dict(a=1, b=0, c=1)), # unseen
import nltk #from nltk.classify.naivebayes import NaiveBayesClassifier nltk.usage(nltk.classify.ClassifierI) l = ['NP','VGF','BLK','CCP','JJP','VGNN'] f=open('small_train') for i in f: line = i.split() if line: l.append(line[1]) l1=[] f.close() d={} f1=open('small_train') for i in f1: line=i.split() d={} # for j in l: # d[j]=0 if line: line1=line[0].split('-') d['a']=line[1] l1.append((d,line1[0])) f1.close() f2=open('small_train') l2=[] for i in f2: line=i.split() d={} # for j in l: # d[j]=0 if line:
globalWords[str(j[0])] += j[1] else: globalWords[str(j[0])] = j[1] print globalWords""" # Classifier tester import numpy import scipy import nltk print 'NumPy Version: ', numpy.__version__ print 'SciPy Version: ', scipy.__version__ print 'NLTK Version: ', nltk.__version__ print nltk.usage(nltk.ClassifierI) # Training & test data arch = sys.argv[1] fi = open(arch, 'r') content = fi.readlines() globalWords = {} countINF, countNAV, countRES = 0.0, 0.0, 0.0 querys, category = [], [] for i in content: data = i.split('\t') if data[0] == "INF": countINF += 1 elif data[0] == "NAV":
globalWords[str(j[0])] += j[1] else: globalWords[str(j[0])] = j[1] print globalWords""" # Classifier tester import numpy; import scipy; import nltk; print 'NumPy Version: ', numpy.__version__ print 'SciPy Version: ', scipy.__version__ print 'NLTK Version: ', nltk.__version__ print nltk.usage(nltk.ClassifierI) # Training & test data arch = sys.argv[1] fi = open(arch, 'r') content = fi.readlines() globalWords = {} countINF, countNAV, countRES = 0.0, 0.0, 0.0 querys, category = [], [] for i in content: data = i.split('\t') if data[0] == "INF": countINF += 1 elif data[0] == "NAV":