Esempi in Python per csv, esempi in Python per pattern.db.csv

Esempio n. 1

0

Mostra file

def resolve_certainty(certainty_info):
    '''Resolve certainty with Naive Bayes'''
    if certainty_info == '':
        return 'No certainty info.'
    else:
        nb = NB()
        for observation, certainty in csv(
                'library/templatetags/c_training_data.csv'):
            v = Document(observation, type=int(certainty), stopwords=True)
            nb.train(v)
        return nb.classify(Document(certainty_info))

Esempio n. 2

0

Mostra file

File: askApatientengine.py Progetto: yedurag/Type-2-Diabetes-SideEffectsAnalysis

def csvGrab(fileName):
    data = csv(fileName)

    data = [[d[1],d[2],d[3],d[4],d[5],d[6],d[7],d[9],d[10]] for d in data]


    data = [data[i] for i in range(1,len(data))]

    data = [[d[0],d[1],d[2],d[3],d[4],d[5],d[6],datetime.datetime.strptime(d[7],"%m/%d/%Y").strftime('%Y-%m-%d'),d[8]] for d in data]


    return data

Esempio n. 3

0

Mostra file

File: characterExtraction.py Progetto: emdaniels/character-extraction

def extractSentiment(characterSentences):
    """
    Trains a Naive Bayes classifier object with the reviews.csv file, analyzes
    the sentence, and returns the tone.
    """
    nb = NB()
    characterTones = defaultdict(list)
    for review, rating in csv("reviews.csv"):
        nb.train(Document(review, type=int(rating), stopwords=True))
    for key, value in characterSentences.iteritems():
        for x in value:
            characterTones[key].append(nb.classify(str(x)))
    return characterTones

Esempio n. 4

0

Mostra file

File: nlp.py Progetto: jogsdjf/NLP-Project

def extractSentiment(characterSentences):
    """
    Trains a Naive Bayes classifier object with the reviews.csv file, analyzes
    the sentence, and returns the tone.
    """
    nb = NB()
    characterTones = defaultdict(list)
    for review, rating in csv("reviews.csv"):
        nb.train(Document(review, type=int(rating), stopwords=True))
    for key, value in characterSentences.items():
        for x in value:
            characterTones[key].append(nb.classify(str(x)))
    return characterTones

Esempio n. 5

0

Mostra file

File: 08-wiktionary.py Progetto: DevKhokhar/pattern

from pattern.web import Wiktionary, DOM
from pattern.db import csv

# This example retrieves male and female given names from Wiktionary (http://en.wiktionary.org).
# It then trains a classifier that can predict the gender of unknown names (about 78% correct).
# The classifier is small (80KB) and fast.

w = Wiktionary(language="en")
f = csv() # csv() is a short alias for Datasheet().

# Collect male and female given names from Wiktionary.
# Store the data as (name, gender)-rows in a CSV-file.

for gender in ("male", "female"):
    for ch in ("abcdefghijklmnopqrstuvwxyz"):
        p = w.search("Appendix:%s_given_names/%s" % (gender.capitalize(), ch.capitalize()), cached=True)
        for name in p.links:
            if not name.startswith("Appendix:"):
                f.append((name, gender[0]))
        f.save("given-names.csv")
        print ch, gender

# Create a classifier that predicts gender based on name.

from pattern.vector import SVM, chngrams, count, kfoldcv

class GenderByName(SVM):

    def train(self, name, gender=None):
        SVM.train(self, self.vector(name), gender)

Esempio n. 6

0

Mostra file

File: pattern_vector.py Progetto: vishalbelsare/pattern_CLiPS

        for feature, w2 in m.lsa.concepts[concept].items():
            if w1 != 0 and w2 != 0:
                print(feature, w1 * w2)
# clustering
d1 = Document('Cats are independent pets.', name='cat')
d2 = Document('Dogs are trustworthy pets.', name='dog')
d3 = Document('Boxes are made of cardboard.', name='box')
m = Model((d1, d2, d3))
print m.cluster(method=HIERARCHICAL, k=2)
# hierarchical clustering
cluster = Cluster((1, Cluster((2, Cluster((3, 4))))))
print cluster.depth
print cluster.flatten(1)
# training a classifier
nb = NB()
for review, rating in csv('data/input/reviews.csv'):
    v = Document(review, type=int(rating), stopwords=True)
    nb.train(v)
print nb.classes
print nb.classify(Document('A good movie!'))
# testing a classifier
data = csv('data/input/reviews.csv')
data = [(review, int(rating)) for review, rating in data]
data = [
    Document(review, type=rating, stopwords=True) for review, rating in data
]
nb = NB(train=data[:500])
accuracy, precision, recall, f1 = nb.test(data[500:])
print accuracy
# binary classification
data = csv('data/input/reviews.csv')

Esempio n. 7

0

Mostra file

File: 08-wiktionary.py Progetto: LiuFang816/SALSTM_py_data

import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.web import Wiktionary, DOM
from pattern.db import csv, pd

# This example retrieves male and female given names from Wiktionary (http://en.wiktionary.org).
# It then trains a classifier that can predict the gender of unknown names (about 78% correct).
# The classifier is small (80KB) and fast.

w = Wiktionary(language="en")
f = csv()  # csv() is a short alias for Datasheet().

# Collect male and female given names from Wiktionary.
# Store the data as (name, gender)-rows in a CSV-file.
# The pd() function returns the parent directory of the current script,
# so pd("given-names.csv") = pattern/examples/01-web/given-names.csv.

for gender in ("male", "female"):
    for ch in ("abcdefghijklmnopqrstuvwxyz"):
        p = w.search("Appendix:%s_given_names/%s" %
                     (gender.capitalize(), ch.capitalize()),
                     cached=True)
        for name in p.links:
            if not name.startswith("Appendix:"):
                f.append((name, gender[0]))
        f.save(pd("given-names.csv"))
        print(ch, gender)

# Create a classifier that predicts gender based on name.

Esempio n. 8

0

Mostra file

File: sentiment.py Progetto: Jack53416/WebMining

import xml.etree.ElementTree as xmlTree
from pattern.vector import Document, NB, count, words
from pattern.web import plaintext
from pattern.db import csv
from collections import Counter

nb = NB()
wordStats = Counter()
opinionStats = Counter({'positive': 0, 'negative': 0, 'overall': 0})

for grade, opinion in csv('trainData.csv', separator = '\t'):
    comment = Document(opinion, type=int(grade), stopwords = True)
    nb.train(comment)

tree = xmlTree.parse("Posts.xml")
root = tree.getroot()

for row in root:
    doc = Document(plaintext(row.attrib['Body']), 
                filter = lambda w: w.strip("'").isalpha() and len(w) > 1,
                stopwords = False)
    opinion = nb.classify(doc)
    opinionStats['overall'] +=1
    if opinion > 0:
        opinionStats['positive'] += 1
    else:
        opinionStats['negative'] += 1
    wordStats += Counter(doc.words)

print wordStats.most_common(10)
print opinionStats

Esempio n. 9

0

Mostra file

File: fb_post_type.py Progetto: ckmarkoh/fb_nltk

# -*- coding: utf-8 -*-


from pattern.vector import Document, Model, TFIDF, SVM, kfoldcv,REGRESSION,RADIAL,CLASSIFICATION
from pattern.db import csv 
from sys import argv
import jieba
import json
import python_db

# extraversion, agreeable, conscientiousness, neuroticism, openness
category = ['ext', 'agr', 'con', 'neu', 'ope'] 

# open the corpus file
data = csv('./csv/corpus.csv')

# create the document.vector
data_doc = {}
for cate in category:
    data_doc[cate] = []
for text, ext, agr, con, neu, ope in data:
    data_doc['ext'].append(Document(' '.join(jieba.cut(text)), type = int(ext)==1))
    data_doc['agr'].append(Document(' '.join(jieba.cut(text)), type = int(agr)==1))
    data_doc['con'].append(Document(' '.join(jieba.cut(text)), type = int(con)==1))
    data_doc['neu'].append(Document(' '.join(jieba.cut(text)), type = int(neu)==1))
    data_doc['ope'].append(Document(' '.join(jieba.cut(text)), type = int(ope)==1))

# create the TFIDF model
m = {}
for cate in category:
    m[cate] = Model(documents = data_doc[cate], weight=TFIDF)

Esempio n. 10

0

Mostra file

File: FbClassifierTraining.py Progetto: yedurag/Type-2-Diabetes-SideEffectsAnalysis

#For training this classifier we need pattern, nltk (including the corpus), re and csv modules#

from pattern.vector import Model, Document, BINARY, SVM, kfoldcv, IG, SLP,KNN, NB
from pattern.db import csv
from pattern.en import ngrams
from pattern.vector import stem, PORTER, LEMMA
from nltk.corpus import stopwords
import re
import csv as csv1

#The file 'FbTrainingData.csv' should be in the same directory#

data = csv('FbTrainingData.csv')
data = [[message, int(side_effect_indicator)] for message, side_effect_indicator in data]




#List of nltk stopwords
stop = stopwords.words('english')




#Adding medicine names and obvious names into the stop words
medlist1 = ["diabetes","actos", "pioglitazone hydrochloride", "pioglitazone",  "glustin", "glizone", "pioz", "zactos"]

medlist2 = ["medformin","glucophage", "metformin", "glucophage xr", "metformin hydrochloride", "carbophage sr", "riomet", "fortamet", "glumetza", "obimet", "gluformin", "dianben", "diabex", "diaformin", "siofor","metfogamma", "riomet"]

medlist3 = ["byetta", "bydureon", "exenatide"]