Ejemplo n.º 1
0
        import logging
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                            level=logging.INFO)
    from gensim import corpora, models
    texts = []
    for cas in cases:
        for srlSentence in cas.srlSentences:
            sentence = []
            for clause in srlSentence:
                for role, text in clause.iteritems():
                    sentence.append(str((role, text)))
            texts.append(sentence)
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    lsi = models.lsimodel.LsiModel(corpus, id2word=dictionary, num_topics=15)
    if save:
        lsi.save('models/lda/srlLSI')


if __name__ == "__main__":
    cases = data.getAllSavedCases(senna=True)
    LSIWordVec()
    #srlLSI(cases, save=True)
    #wordVecSrlBigrams(cases)
    #cas = case.Case('6850872635911328292')
    #representation = wordVecSrl(cases)
    """
    for cas in cases:
        print getRepresentation(cas, log=True)
    """
Ejemplo n.º 2
0
                    if 'A0' in clause and clause['A0'] == word:
                        polyfitInput[0] += 1
                    if 'V' in clause and clause['V'] == word:
                        polyfitInput[0] += 1
                for clause in dm:
                    if 'A0' in clause and clause['A0'] == word:
                        polyfitInput[1] += 1
                    if 'V' in clause and clause['V'] == word:
                        polyfitInput[1] += 1
                print polyfitInput
                model[i] = np.polyfit([1, m], polyfitInput, 1)
            print model
    return findShortChains()

if __name__ == "__main__":
    cases = data.getAllSavedCases()
    labeledTraining = learning.findLabels(cases)
    learning.readLabels(labeledTraining)
    unlabeledCases = filter(lambda x:x not in labeledTraining, cases)
    unlabeledTraining = unlabeledCases[:-1]
    testing = [unlabeledCases[-1]]
    learning.labelCases(labeledTraining, unlabeledTraining, testing, numIterations=20)
    for cas in testing:
        print 'case ' + str(cas.name)
        d = {}
        for person, summarySentences in cas.summary.iteritems():
            personSentences = []
            for sentence in summarySentences:
                personSentences.append(cas.sentences[sentence])
            d[person] = personSentences
        for person, summarySentences in d.iteritems():
Ejemplo n.º 3
0
import data
from case import Case
import random

if __name__ == "__main__":
    cases = data.getAllSavedCases(makeString=False, senna=False)
    while True:
        cas = random.choice(cases)
        moreLinks = data.getAboutCases(cas)
        print moreLinks
        for i, link in enumerate(moreLinks):
            print i
            c = Case(link, senna=False)
Ejemplo n.º 4
0
def srlLSI(cases, log=True, save=False):
    if log:
        import logging
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
    from gensim import corpora, models
    texts = []
    for cas in cases:
        for srlSentence in cas.srlSentences:
            sentence = []
            for clause in srlSentence:
                for role, text in clause.iteritems():
                    sentence.append(str((role, text)))
            texts.append(sentence)
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    lsi = models.lsimodel.LsiModel(corpus, id2word=dictionary, num_topics=15)
    if save:
        lsi.save('models/lda/srlLSI')

if __name__ == "__main__":
    cases = data.getAllSavedCases(senna=True)
    LSIWordVec()
    #srlLSI(cases, save=True)
    #wordVecSrlBigrams(cases)
    #cas = case.Case('6850872635911328292')
    #representation = wordVecSrl(cases)
    """
    for cas in cases:
        print getRepresentation(cas, log=True)
    """
Ejemplo n.º 5
0
                    if 'V' in clause and clause['V'] == word:
                        polyfitInput[0] += 1
                for clause in dm:
                    if 'A0' in clause and clause['A0'] == word:
                        polyfitInput[1] += 1
                    if 'V' in clause and clause['V'] == word:
                        polyfitInput[1] += 1
                print polyfitInput
                model[i] = np.polyfit([1, m], polyfitInput, 1)
            print model

    return findShortChains()


if __name__ == "__main__":
    cases = data.getAllSavedCases()
    labeledTraining = learning.findLabels(cases)
    learning.readLabels(labeledTraining)
    unlabeledCases = filter(lambda x: x not in labeledTraining, cases)
    unlabeledTraining = unlabeledCases[:-1]
    testing = [unlabeledCases[-1]]
    learning.labelCases(labeledTraining,
                        unlabeledTraining,
                        testing,
                        numIterations=20)
    for cas in testing:
        print 'case ' + str(cas.name)
        d = {}
        for person, summarySentences in cas.summary.iteritems():
            personSentences = []
            for sentence in summarySentences: