Ejemplo n.º 1
0
    def __Load(self):
        self.__unigram_tagger = SaveLoad.LoadByte(
            self.__unigram_taggerFileName)
        self.__bigram_tagger = SaveLoad.LoadByte(self.__bigram_taggerFileName)

        if self.__bigram_tagger != False and self.__unigram_tagger != False:
            return True
        return False
Ejemplo n.º 2
0
    def __init__ (self):
        self.qta=155
        
        
        self.__folder="risorse\\Dati\\"
        self.__foldertaggedsents =self.__folder+"training\\paisaTagged"
        self.wordsFreqFilename=self.__folder+"wordFreq.fdist"
        self.stopWordsFilename=self.__folder+"StopWords.list"
        self.folderDialoghiPosTag=self.__folder+"dialoghi\\postag\\"
        
        self.domandeMostFreqFilename=self.__folder+"domandeMostFreq.txt"
        self.sentimentiMostFreqFilename=self.__folder+"sentimentiMostFreq.txt"
        self.emozioniMostFreqFilename=self.__folder+"emozioniMostFreq.txt"
        
        self.wordsFreq=SaveLoad.LoadByte(self.wordsFreqFilename)
#        self.stopWords=SaveLoad.LoadByte(self.stopWords)
        
        self.noun=set()
        self.verb=set() 
        
        self.SentimentiMostFreq()
        self.EmozioniMostFreq()
        #domande prese da internet come "in-cultura" popolare e keywords di google
        self.DomaneMostFreq()
        
        #keywords prese dai dialoghi e mischiate random!
        self.ElaboraDati()
        self.EstraiMostFreq()    
        
        self.EstraiYahooAnswers(self.qta)
Ejemplo n.º 3
0
 def Load(self):
     self.classificatoreSenti=SaveLoad.LoadByte(self.classificatoreSentiFilename)
     
     if self.classificatoreSenti:
         return True
         
     return False
Ejemplo n.º 4
0
 def __Load(self):
     #carico le liste di dati
     pos=SaveLoad.LoadLines(self.__folder+"SentiPos.txt")
     neg=SaveLoad.LoadLines(self.__folder+"SentiNeg.txt")
             
     self.dictsentiScore={}
     
     self.sentiPos=list()
     for word in pos:
         word=word.strip()
         word=self.__PosTagger.PosTag(word)
         if word!=list():            
             word=self.__Stem.StemWord(word[0])
             self.dictsentiScore[word[0]]=float(1)
             self.sentiPos.append(word[0])
         
             
     self.sentiNeg=list()
     for word in neg:
         word=word.strip()
         word=self.__PosTagger.PosTag(word)
         if word!=list():
             word=self.__Stem.StemWord(word[0])
             self.dictsentiScore[word[0]]=float(-1)
             self.sentiNeg.append(word[0])
     
     self.__stopWords=SaveLoad.LoadByte(self.__stopWordsFilename)
     if not self.__stopWords:
         print "file stopwords mancante...\nestrazione in corso"
         paisaWordsDataExtractor.paisaWordsDataExtractor()
  
         if not self.__Load():
             print "processo di estrazione corrotto\nimpossibile procedere..."
             return False
     return True
Ejemplo n.º 5
0
    def __CaricaDlg(self, dlgname):
        self.dlg = SaveLoad.LoadLines(self.folder + "\\dialoghiRaw\\" +
                                      dlgname + self.extdlg)
        self.dictWordScoreRow = SaveLoad.LoadByte(self.folder + dlgname +
                                                  self.extdictWordScoreRow)
        self.dictWordIndex = SaveLoad.LoadByte(self.folder + dlgname +
                                               self.extdictWordIndex)

        if not (self.dictWordScoreRow and self.dictWordIndex):
            print "file dict mancanti\ncreazione in corso..."
            CreaDatiQuestEngine.CreaDatiQuestEngine(dlgname)
            #ricarico i dati
            self.dictWordScoreRoww = SaveLoad.LoadByte(
                self.folder + dlgname + self.extdictWordScoreRow)
            self.dictWordIndex = SaveLoad.LoadByte(self.folder + dlgname +
                                                   self.extdictWordIndex)
Ejemplo n.º 6
0
    def __Load(self):
        """
            
            Load
            
            input: None
            hidden: carica i dati 
            output:  True se il processo termina correttamente
        
        """

        self.__grammars = SaveLoad.LoadByte(self.__grammarsFilename)
        if not self.__grammars:
            import GrammarExtractor
            GrammarExtractor.GrammarExtractor()
            self.__grammars = SaveLoad.LoadByte(self.__grammarsFilename)
        return True
Ejemplo n.º 7
0
def __Load():
    __stopWords = SaveLoad.LoadByte(__stopWordsFilename)
    if not __stopWords:
        print "file stopwords mancante...\nestrazione in corso"
        paisaWordsDataExtractor.paisaWordsDataExtractor()

        if not __Load():
            print "processo di estrazione corrotto\nimpossibile procedere..."
            return False
    return True
Ejemplo n.º 8
0
 def ElaboraDati(self):
     files=glob.glob(self.folderDialoghiPosTag+'*')
     tot=len(files)
     i=1
     
     for file in files:
         dati=SaveLoad.LoadByte(file)
         for line in dati:
             for word in line:
                 if word[1]==u"NOUN":
                     self.noun.add(word[0])
                 elif word[1]==u"VER":
                     self.verb.add(word[0])
         print "Elaborato", i, "/",tot
         i+=1
Ejemplo n.º 9
0
    def __Load(self):
        """
            
            Load
            
            input: None
            hidden: carica il tokenize
            output: True se il processo termina correttamente
            
        """

        self.__sentsTokenizer = SaveLoad.LoadByte(self.__sentsTokenFileName)
        if not self.__sentsTokenizer:
            self.__corpus = nltk.corpus.ConllCorpusReader(
                self.__foldertaggedsents, '.*', self.__lst_pos)
            return False
        return True
Ejemplo n.º 10
0
 def __init__(self):
     self.__folder = "risorse\\Dati\\"
     self.stopWordFilename = self.__folder + "StopWords.list"
     self.stopWords = SaveLoad.LoadByte(self.stopWordFilename)
Ejemplo n.º 11
0
 def __CreaClassificatore(self):
     dati=SaveLoad.LoadByte(self.__SentimTrainsetFilename)            
     
     self.classificatoreSenti=self.SKClassifierSVM(dati)
     
     self.Save()
Ejemplo n.º 12
0
    def Load(self):
        self.suffissiTag = SaveLoad.LoadByte(self.suffissiTagFilename)
        if self.suffissiTag:
            return True

        return False
Ejemplo n.º 13
0
    def __init__(self, dlgname):
        self.extdlg = ".txt"
        self.extdlgData = ".dat"
        self.extdlgVoc = ".voc"
        self.extdictWordIndex = '.dictWordIndex'
        self.extdictWordScoreRow = '.dictWordsScoreRows'

        self.folder = "risorse\\Dati\\dialoghi\\"

        dlg = SaveLoad.LoadLines(self.folder + "\\dialoghiRaw\\" + dlgname +
                                 self.extdlg)
        #dlgfilename="C:\\Esame TTR\\risorse\\Dati\\dialoghi\\dialoghiRaw\\Black Mirror  dvdrip.txt"

        dlgdata = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgData)
        vocab = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgVoc)

        try:
            dlgdata.mean()
        except:
            print "file .dat mancante\ncreazione in corso..."
            #creo il file con tdidft cosine ecc...
            tfidfDlgCreator.tfidfDataCreator(dlgname)

            #ricarico i dati
            dlgdata = SaveLoad.LoadByte(self.folder + dlgname +
                                        self.extdlgData)
            vocab = SaveLoad.LoadByte(self.folder + dlgname + self.extdlgVoc)

        dictIndexWord = dict()
        for i in xrange(len(vocab)):
            dictIndexWord[i] = vocab[i]

        #dictPosWordsReverse
        self.dictWordIndex = dict()
        #[word]=index in dictIndexWord
        for k, v in dictIndexWord.iteritems():
            self.dictWordIndex[v] = k

        self.dictWordScoreRow = collections.defaultdict(list)
        #[word][score in row][Row] -> per max

        #        print dlgdata.shape[0]
        #        print dlgdata.shape[1]

        for row in xrange(dlgdata.shape[0]):
            for col in xrange(dlgdata.shape[1]):
                indice = tuple([row, col])
                prob = dlgdata[indice]
                if prob != float(0):
                    #                    print row, col, prob

                    scoreRow = dict()

                    word = dictIndexWord[col]
                    count = dlg[row].split()
                    count = count.count(word)
                    count = count * prob  #dlgdata[row][col]

                    scoreRow[count] = row
                    self.dictWordScoreRow[word].append(scoreRow)

        del dlgdata

        print "file Saved:", SaveLoad.SaveByte(
            self.dictWordScoreRow,
            self.folder + dlgname + self.extdictWordScoreRow)
        print "file Saved:", SaveLoad.SaveByte(
            self.dictWordIndex, self.folder + dlgname + self.extdictWordIndex)
Ejemplo n.º 14
0
    for dato in dati:
        if len(dato[0]) > 3:
            i = dato[0][:2]
            fs = dato[0][-1:]
            fd = dato[0][-2:]
            ft = dato[0][-3:]

            feature = {'i': i, 'fs': fs, 'fd': fd, 'ft': ft}
            features.append([feature, dato[1]])

    return features


def Feature(dato):
    i = dato[:2]
    fs = dato[-1:]
    fd = dato[-2:]
    ft = dato[-3:]

    return {'i': i, 'fs': fs, 'fd': fd, 'ft': ft}


import SaveLoad

svm = SaveLoad.LoadByte("SVM_mf.tmp")
dt = SaveLoad.LoadByte("NLTK_DTP_mf.tmp")
word = 'caschereste'

w = Feature(word)
print svm.classify(w)
 def __Load(self):
     self.parole = SaveLoad.LoadByte(self.paroleFilename)
     print len(self.parole)
     self.parole = sorted({i for i in self.parole})
     print len(self.parole)