Example #1
0
 def __init__(self, k=1):
     # phrase detector
     self.phrase_detector = PmiPhraseDetector(RawSentenceStream())
     # number converter
     self.tokenizer = RawTokenizer()
     # build model
     self.model = W2Vmodel(PhraseSentenceStream(self.phrase_detector))
     # parameters
     self.p = 0.8
     self.k = k
Example #2
0
 def __init__(self, alpha=6.0):
     # phrase detector
     self.phrase_detector = PmiPhraseDetector(RawSentenceStream())
     # number converter
     self.tokenizer = RawTokenizer()
     # build model
     self.w2v = W2Vmodel(PhraseSentenceStream(self.phrase_detector))
     self.tfidf = TFIDFmodel()
     # parameters
     self.alpha = alpha
     self.k = 3
     self.p = 0.80
Example #3
0
    def __init__(self):
        self.w2v = W2Vmodel()
        xs = []
        ys = []
        with open('data/misc/domain_classify.txt', 'r') as infile:
            lines = infile.read().split("\n")
            for line in lines:
                word, y = line.split(',')
                word_vec = self.w2v.inner_model[word]
                xs.append(word_vec)
                ys.append(y)

        self.clf = svm.SVC()
        self.clf.fit(xs, ys)
import logging
from irmodels.W2Vmodel import W2Vmodel
from textanalysis.texts import PhraseSentenceStream, RawSentenceStream
from textanalysis.phrasedetection import PmiPhraseDetector
import numpy as np
from random import sample
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# setup logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
# phrase detector
phrase_detector = PmiPhraseDetector(RawSentenceStream())
# build model
m = W2Vmodel(PhraseSentenceStream(phrase_detector))

diseases = {}
symptoms = {}

with open("testdiseases.txt", 'r') as infile:
    for line in infile.read().split("\n")[:-1]:
        parts = line.split(",")
        diseases[parts[1]] = int(parts[0])

with open("testsymptoms.txt", 'r') as infile:
    for line in infile.read().split("\n")[:-1]:
        symptoms[line] = 1

# disease data
keywords = diseases.keys()