def getArticles(articleList): singleSets = [] for article in articleList: try: chunks = gc.getChunks(article[1]) tags = tag.getTags(article[1],chunks) if tags == []: continue # check this is right. go to next itteration """The Stanford Open IE tags""" subject = tags['subject'] relation = tags['relation'] objects = tags['object'] objects = objects.split() content = wp.getArticle(subject) rawSentences = sent.getSentences(content) sentences = [] for sentence in rawSentences: if(hd.hasDate(sentence) !== []): sentences.append(sentence) listOfYears.append(article[0]) SS = {'title':article[1], 'sentences':sentences, 'year':article[0]} singleSets.append(SS) except: pass return singleSets
import app.parser.articleRetrieval.wikipediaParse as wp import app.parser.sentences as sent import app.analytics.sentenceFiltering.actionSentences as action import app.analytics.functions.hasDate as hd import app.analytics.functions.synonym as sn import app.analytics.getFeatures as ft articles = importArticles.getData() sentences= [] count = 0 for article in articles[0:10]: print article chunks = gc.getChunks(article[1]) tags = tag.getTags(article[1],chunks) if tags == []: continue # check this is right. go to next itteration """The Stanford Open IE tags""" subject = tags['subject'] relation = tags['relation'] objects = tags['object'] objects = objects.split() print objects print relation print subject article = wp.getArticle(subject) sentences = sent.getSentences(article) features= ft.getFeatures(subject, objects, relation, sentences)