Python usage Beispiele, nltk.usage Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: nonprojectivedependencyparser.py Projekt: B-Rich/Fem-Coding-Challenge

    def train(self, graphs):
        """
        Trains a C{NaiveBayesClassifier} using the edges present in 
        graphs list as positive examples, the edges not present as
        negative examples.  Uses a feature vector of head-word,
        head-tag, child-word, and child-tag.
        
        @type graphs: A list of C{DependencyGraph}
        @param graphs: A list of dependency graphs to train the scorer.     
        """

        # Create training labeled training examples
        labeled_examples = []
        for graph in graphs:
            for head_node in graph.nodelist:
                for child_index in range(len(graph.nodelist)):
                    child_node = graph.get_by_address(child_index)
                    if child_index in head_node['deps']:
                        label = "T"
                    else:
                        label = "F"
                    features = [head_node['word'], head_node['tag'], child_node['word'], child_node['tag']]
                    labeled_examples.append((dict(a=head_node['word'],b=head_node['tag'],c=child_node['word'],d=child_node['tag']), label))
        # Train the classifier
        import nltk
        nltk.usage(nltk.ClassifierI)
        self.classifier = nltk.classify.NaiveBayesClassifier.train(labeled_examples)

Beispiel #2

0

Datei anzeigen

Datei: nonprojectivedependencyparser.py Projekt: wrand/tweater

    def train(self, graphs):
        """
        Trains a C{NaiveBayesClassifier} using the edges present in 
        graphs list as positive examples, the edges not present as
        negative examples.  Uses a feature vector of head-word,
        head-tag, child-word, and child-tag.
        
        @type graphs: A list of C{DependencyGraph}
        @param graphs: A list of dependency graphs to train the scorer.     
        """

        # Create training labeled training examples
        labeled_examples = []
        for graph in graphs:
            for head_node in graph.nodelist:
                for child_index in range(len(graph.nodelist)):
                    child_node = graph.get_by_address(child_index)
                    if child_index in head_node['deps']:
                        label = "T"
                    else:
                        label = "F"
                    features = [
                        head_node['word'], head_node['tag'],
                        child_node['word'], child_node['tag']
                    ]
                    labeled_examples.append((dict(a=head_node['word'],
                                                  b=head_node['tag'],
                                                  c=child_node['word'],
                                                  d=child_node['tag']), label))
        # Train the classifier
        import nltk
        nltk.usage(nltk.ClassifierI)
        self.classifier = nltk.classify.NaiveBayesClassifier.train(
            labeled_examples)

Beispiel #3

0

Datei anzeigen

# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with GNU Emacs; see the file COPYING.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

import nltk
nltk.usage(nltk.classify.ClassifierI)
from pprint import pprint

train = [
    (dict(a=1, b=1, c=1), 'y'),
    (dict(a=1, b=1, c=1), 'x'),
    (dict(a=1, b=1, c=0), 'y'),
    (dict(a=0, b=1, c=1), 'x'),
    (dict(a=0, b=1, c=1), 'y'),
    (dict(a=0, b=0, c=1), 'y'),
    (dict(a=0, b=1, c=0), 'x'),
    (dict(a=0, b=0, c=0), 'x'),
    (dict(a=0, b=1, c=1), 'y'),
]
test = [
    (dict(a=1, b=0, c=1)),  # unseen

Beispiel #4

0

Datei anzeigen

Datei: simple.py Projekt: ChenxiCui/Phrase_detection

import nltk
#from nltk.classify.naivebayes import NaiveBayesClassifier
nltk.usage(nltk.classify.ClassifierI)
l = ['NP','VGF','BLK','CCP','JJP','VGNN']
f=open('small_train')
for i in f:
	line = i.split()
	if line:
		l.append(line[1])
l1=[]
f.close()
d={}
f1=open('small_train')
for i in f1:
	line=i.split()
	d={}
#	for j in l:
#	d[j]=0
	if line:
		line1=line[0].split('-')
		d['a']=line[1]
		l1.append((d,line1[0]))
f1.close()
f2=open('small_train')
l2=[]
for i in f2:
	line=i.split()
	d={}
#	for j in l:
#		d[j]=0
	if line:

Beispiel #5

0

Datei anzeigen

Datei: maxent.py Projekt: rodrwan/data_mining

                globalWords[str(j[0])] += j[1]
            else:
                globalWords[str(j[0])] = j[1]

print globalWords"""
# Classifier tester

import numpy
import scipy
import nltk

print 'NumPy Version: ', numpy.__version__
print 'SciPy Version: ', scipy.__version__
print 'NLTK Version: ', nltk.__version__

print nltk.usage(nltk.ClassifierI)

# Training & test data
arch = sys.argv[1]
fi = open(arch, 'r')
content = fi.readlines()
globalWords = {}
countINF, countNAV, countRES = 0.0, 0.0, 0.0
querys, category = [], []

for i in content:

    data = i.split('\t')
    if data[0] == "INF":
        countINF += 1
    elif data[0] == "NAV":

Beispiel #6

0

Datei anzeigen

Datei: maxent.py Projekt: rodrwan/data_mining

                globalWords[str(j[0])] += j[1]
            else:
                globalWords[str(j[0])] = j[1]

print globalWords"""
# Classifier tester

import numpy;
import scipy;
import nltk;

print 'NumPy Version: ', numpy.__version__
print 'SciPy Version: ', scipy.__version__
print 'NLTK Version: ', nltk.__version__

print nltk.usage(nltk.ClassifierI)

# Training & test data
arch = sys.argv[1]
fi =  open(arch, 'r')
content = fi.readlines()
globalWords = {}
countINF, countNAV, countRES = 0.0, 0.0, 0.0
querys, category = [], []

for i in content:

    data = i.split('\t')
    if data[0] == "INF":
        countINF += 1
    elif data[0] == "NAV":