import logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) from gensim import corpora, models texts = [] for cas in cases: for srlSentence in cas.srlSentences: sentence = [] for clause in srlSentence: for role, text in clause.iteritems(): sentence.append(str((role, text))) texts.append(sentence) dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] lsi = models.lsimodel.LsiModel(corpus, id2word=dictionary, num_topics=15) if save: lsi.save('models/lda/srlLSI') if __name__ == "__main__": cases = data.getAllSavedCases(senna=True) LSIWordVec() #srlLSI(cases, save=True) #wordVecSrlBigrams(cases) #cas = case.Case('6850872635911328292') #representation = wordVecSrl(cases) """ for cas in cases: print getRepresentation(cas, log=True) """
if 'A0' in clause and clause['A0'] == word: polyfitInput[0] += 1 if 'V' in clause and clause['V'] == word: polyfitInput[0] += 1 for clause in dm: if 'A0' in clause and clause['A0'] == word: polyfitInput[1] += 1 if 'V' in clause and clause['V'] == word: polyfitInput[1] += 1 print polyfitInput model[i] = np.polyfit([1, m], polyfitInput, 1) print model return findShortChains() if __name__ == "__main__": cases = data.getAllSavedCases() labeledTraining = learning.findLabels(cases) learning.readLabels(labeledTraining) unlabeledCases = filter(lambda x:x not in labeledTraining, cases) unlabeledTraining = unlabeledCases[:-1] testing = [unlabeledCases[-1]] learning.labelCases(labeledTraining, unlabeledTraining, testing, numIterations=20) for cas in testing: print 'case ' + str(cas.name) d = {} for person, summarySentences in cas.summary.iteritems(): personSentences = [] for sentence in summarySentences: personSentences.append(cas.sentences[sentence]) d[person] = personSentences for person, summarySentences in d.iteritems():
import data from case import Case import random if __name__ == "__main__": cases = data.getAllSavedCases(makeString=False, senna=False) while True: cas = random.choice(cases) moreLinks = data.getAboutCases(cas) print moreLinks for i, link in enumerate(moreLinks): print i c = Case(link, senna=False)
def srlLSI(cases, log=True, save=False): if log: import logging logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) from gensim import corpora, models texts = [] for cas in cases: for srlSentence in cas.srlSentences: sentence = [] for clause in srlSentence: for role, text in clause.iteritems(): sentence.append(str((role, text))) texts.append(sentence) dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] lsi = models.lsimodel.LsiModel(corpus, id2word=dictionary, num_topics=15) if save: lsi.save('models/lda/srlLSI') if __name__ == "__main__": cases = data.getAllSavedCases(senna=True) LSIWordVec() #srlLSI(cases, save=True) #wordVecSrlBigrams(cases) #cas = case.Case('6850872635911328292') #representation = wordVecSrl(cases) """ for cas in cases: print getRepresentation(cas, log=True) """
if 'V' in clause and clause['V'] == word: polyfitInput[0] += 1 for clause in dm: if 'A0' in clause and clause['A0'] == word: polyfitInput[1] += 1 if 'V' in clause and clause['V'] == word: polyfitInput[1] += 1 print polyfitInput model[i] = np.polyfit([1, m], polyfitInput, 1) print model return findShortChains() if __name__ == "__main__": cases = data.getAllSavedCases() labeledTraining = learning.findLabels(cases) learning.readLabels(labeledTraining) unlabeledCases = filter(lambda x: x not in labeledTraining, cases) unlabeledTraining = unlabeledCases[:-1] testing = [unlabeledCases[-1]] learning.labelCases(labeledTraining, unlabeledTraining, testing, numIterations=20) for cas in testing: print 'case ' + str(cas.name) d = {} for person, summarySentences in cas.summary.iteritems(): personSentences = [] for sentence in summarySentences: