def main(): #doc = "/home/daniel/data/Ciclo6/Tesis2/stompol-tweets-train-tagged.xml" doc = "/home/daniel/data/Ciclo6/Tesis2/xmlSampleFile.xml" #doc = "/home/daniel/data/Ciclo6/Tesis2/xmlSampleFile2.xml" #doc = "/home/daniel/data/Ciclo6/Tesis2/xmlStandardFile.xml" xmlparser = XML.XmlParser(doc) tweets = xmlparser.root corpus = COR.Corpus() for tweet in tweets: tweetEntities = xmlparser.extractEntity(tweet) corpus.addNewEntities(tweetEntities) for tweetEntity in tweetEntities: entity = corpus.getEntity(tweetEntity) entity.addReview(tweet) lsa = LAT.LSA(tweets) lsa.singularValueDecomposition() lsa.reduceDimension() lsa.reconstructMatrix() corpus.assignSemanticSimilarity(lsa) sentiStrength = SENSTR.sentiStrength() corpus.assignPolaritySimilarity(sentiStrength) for entity in corpus.entities: entity.obtainLeaders() entity.obtainCommunities() entity.assignOrder() entity.fullParsing() print(entity.generateSummary()) print()
def m_button1OnButtonClick(self, event): xml = xmlparser.XmlParser().parse() patient = Patient(xml) karkinos = Karkinos(patient) self.text_indice.SetValue(self.__index_case(karkinos, patient)) self.text_informativo.SetValue(self.__informativo(karkinos)) self.text_asociados.SetValue(self.__casos_asociados(karkinos)) self.text_diagnostico.SetValue(self.__diagnosis(karkinos)) self.label_evaluation.SetLabel("Evaluación completada con éxito")
def __init__(self, id): self.xml = xmlparser.XmlParser('expo.xml') self.id = id self.zones = [] for i in range(self.getLargeur()): tmp = [] for j in range(self.getHauteur()): z = zone.Zone(i, j, self.xml.isZone(j, i)) tmp.append(z) self.zones.append(tmp)
class extractEntityMethod(unittest.TestCase): doc = r"D:\Ciclo 6\Tesis 2\Tesis\source\test\xmltestfile.xml" xmlparser = XML.XmlParser(doc) tweets = xmlparser.root def testEntityExtractionSingle(self): self.assertEqual(self.xmlparser.extractEntity(self.tweets[0]), ['Partido_Popular']) def testEntityExtractionMultiple(self): self.assertEqual( self.xmlparser.extractEntity(self.tweets[1]), ['Partido_Socialista_Obrero_Espanol', 'Partido_Popular']) def testEntityExtractionMultipleOnOneEntity(self): self.assertEqual(self.xmlparser.extractEntity(self.tweets[2]), [ 'Partido_Popular', 'Podemos', 'Partido_Socialista_Obrero_Espanol', 'Izquierda_Unida', 'Ciudadanos' ])
class addNewEntitiesMethod(unittest.TestCase): doc = r"D:\Ciclo 6\Tesis 2\Tesis\source\test\xmltestfile.xml" xmlparser = XML.XmlParser(doc) tweets = xmlparser.root def setUp(self): self.corpus = COR.Corpus() def testAddNewEntitiesSingle(self): tweetEntities = self.xmlparser.extractEntity(self.tweets[0]) self.corpus.addNewEntities(tweetEntities) self.assertEqual(self.corpus.asEntityList(), ['Partido_Popular']) del self.corpus.entities[:] def testAddNewEntitiesMultiple(self): tweetEntities = self.xmlparser.extractEntity(self.tweets[1]) self.corpus.addNewEntities(tweetEntities) self.assertEqual( self.corpus.asEntityList(), ['Partido_Socialista_Obrero_Espanol', 'Partido_Popular']) del self.corpus.entities[:]
continue else: zonesDejaComptees.append([x+1, y]) #Si on se rend ici, on ajoute un kiosque self.nbKiosques += 1 def aDejaEteComptee(self, zonesDejaComptees, x, y): for i in (zonesDejaComptees): if (i == [x, y]): return True return False if __name__ == '__main__': import xmlparser xml = xmlparser.XmlParser('expo.xml') app = QApplication([]) qdb = db.Database() qdb.openSqlConnection("QSQLITE", "db.sqlite") zones = [] for i in range(xml.getLargeur()): tmp = [] for j in range(xml.getHauteur()): z = zone.Zone(i, j, xml.isZone(j, i)) tmp.append(z) zones.append(tmp) v = VueRapportExposant(100, zones) #~ v = VueRapportGeneral(zones)
import xmlparser import nlp from gensim import corpora, models, similarities from gensim.models import Phrases import os import re import logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) document_start = 11 document_end = 500 parser = xmlparser.XmlParser("snowflakethesis.xml") document = parser.refactorAllText(7, 10) doc_paragraphs = [] mode = 0 min_sent_len_paragraph = 0 #We only want to work with paragraphs that have more than two sentences for paragraph in document.paragraphs: if (paragraph.page >= document_start and paragraph.page <= document_end): if len(paragraph.paragraph.split('. ')) > min_sent_len_paragraph: doc_paragraphs.append(paragraph) #If we have saved files, just use them #else create them if (os.path.exists('snowflakethesisa.dict') and os.path.exists('snowflakethesisa.mm')): dictionary = corpora.Dictionary.load('snowflakethesis.dict') corpus = corpora.MmCorpus('snowflakethesis.mm') print('Used saved files')