Ejemplo n.º 1
0
def main():
    #doc = "/home/daniel/data/Ciclo6/Tesis2/stompol-tweets-train-tagged.xml"
    doc = "/home/daniel/data/Ciclo6/Tesis2/xmlSampleFile.xml"
    #doc = "/home/daniel/data/Ciclo6/Tesis2/xmlSampleFile2.xml"
    #doc = "/home/daniel/data/Ciclo6/Tesis2/xmlStandardFile.xml"

    xmlparser = XML.XmlParser(doc)
    tweets = xmlparser.root
    corpus = COR.Corpus()

    for tweet in tweets:
        tweetEntities = xmlparser.extractEntity(tweet)
        corpus.addNewEntities(tweetEntities)
        for tweetEntity in tweetEntities:
            entity = corpus.getEntity(tweetEntity)
            entity.addReview(tweet)

    lsa = LAT.LSA(tweets)
    lsa.singularValueDecomposition()
    lsa.reduceDimension()
    lsa.reconstructMatrix()
    corpus.assignSemanticSimilarity(lsa)

    sentiStrength = SENSTR.sentiStrength()
    corpus.assignPolaritySimilarity(sentiStrength)

    for entity in corpus.entities:
        entity.obtainLeaders()
        entity.obtainCommunities()
        entity.assignOrder()
        entity.fullParsing()
        print(entity.generateSummary())
        print()
Ejemplo n.º 2
0
    def m_button1OnButtonClick(self, event):
        xml = xmlparser.XmlParser().parse()
        patient = Patient(xml)
        karkinos = Karkinos(patient)

        self.text_indice.SetValue(self.__index_case(karkinos, patient))
        self.text_informativo.SetValue(self.__informativo(karkinos))
        self.text_asociados.SetValue(self.__casos_asociados(karkinos))
        self.text_diagnostico.SetValue(self.__diagnosis(karkinos))
        self.label_evaluation.SetLabel("Evaluación completada con éxito")
Ejemplo n.º 3
0
    def __init__(self, id):
        self.xml = xmlparser.XmlParser('expo.xml')
        self.id = id

        self.zones = []
        for i in range(self.getLargeur()):
            tmp = []
            for j in range(self.getHauteur()):
                z = zone.Zone(i, j, self.xml.isZone(j, i))
                tmp.append(z)
            self.zones.append(tmp)
Ejemplo n.º 4
0
class extractEntityMethod(unittest.TestCase):

    doc = r"D:\Ciclo 6\Tesis 2\Tesis\source\test\xmltestfile.xml"

    xmlparser = XML.XmlParser(doc)
    tweets = xmlparser.root

    def testEntityExtractionSingle(self):
        self.assertEqual(self.xmlparser.extractEntity(self.tweets[0]),
                         ['Partido_Popular'])

    def testEntityExtractionMultiple(self):
        self.assertEqual(
            self.xmlparser.extractEntity(self.tweets[1]),
            ['Partido_Socialista_Obrero_Espanol', 'Partido_Popular'])

    def testEntityExtractionMultipleOnOneEntity(self):
        self.assertEqual(self.xmlparser.extractEntity(self.tweets[2]), [
            'Partido_Popular', 'Podemos', 'Partido_Socialista_Obrero_Espanol',
            'Izquierda_Unida', 'Ciudadanos'
        ])
Ejemplo n.º 5
0
class addNewEntitiesMethod(unittest.TestCase):

    doc = r"D:\Ciclo 6\Tesis 2\Tesis\source\test\xmltestfile.xml"

    xmlparser = XML.XmlParser(doc)
    tweets = xmlparser.root

    def setUp(self):
        self.corpus = COR.Corpus()

    def testAddNewEntitiesSingle(self):
        tweetEntities = self.xmlparser.extractEntity(self.tweets[0])
        self.corpus.addNewEntities(tweetEntities)
        self.assertEqual(self.corpus.asEntityList(), ['Partido_Popular'])
        del self.corpus.entities[:]

    def testAddNewEntitiesMultiple(self):
        tweetEntities = self.xmlparser.extractEntity(self.tweets[1])
        self.corpus.addNewEntities(tweetEntities)
        self.assertEqual(
            self.corpus.asEntityList(),
            ['Partido_Socialista_Obrero_Espanol', 'Partido_Popular'])
        del self.corpus.entities[:]
Ejemplo n.º 6
0
                            continue
                        else:
                            zonesDejaComptees.append([x+1, y])
                    
                    #Si on se rend ici, on ajoute un kiosque
                    self.nbKiosques += 1
    
    def aDejaEteComptee(self, zonesDejaComptees, x, y):
        for i in (zonesDejaComptees):
            if (i == [x, y]):
                return True
        return False

if __name__ == '__main__':
    import xmlparser
    xml = xmlparser.XmlParser('expo.xml')
    
    app = QApplication([])
    qdb = db.Database()
    qdb.openSqlConnection("QSQLITE", "db.sqlite")
        
    zones = []
    for i in range(xml.getLargeur()):
        tmp = []
        for j in range(xml.getHauteur()):
            z = zone.Zone(i, j, xml.isZone(j, i))
            tmp.append(z)
        zones.append(tmp)
    
    v = VueRapportExposant(100, zones)
    #~ v = VueRapportGeneral(zones)
import xmlparser
import nlp
from gensim import corpora, models, similarities
from gensim.models import Phrases
import os
import re

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
document_start = 11
document_end = 500
parser = xmlparser.XmlParser("snowflakethesis.xml")
document = parser.refactorAllText(7, 10)
doc_paragraphs = []
mode = 0
min_sent_len_paragraph = 0

#We only want to work with paragraphs that have more than two sentences
for paragraph in document.paragraphs:
    if (paragraph.page >= document_start and paragraph.page <= document_end):
        if len(paragraph.paragraph.split('. ')) > min_sent_len_paragraph:
            doc_paragraphs.append(paragraph)

#If we have saved files, just use them
#else create them
if (os.path.exists('snowflakethesisa.dict')
        and os.path.exists('snowflakethesisa.mm')):
    dictionary = corpora.Dictionary.load('snowflakethesis.dict')
    corpus = corpora.MmCorpus('snowflakethesis.mm')
    print('Used saved files')