コード例 #1
0
def getArticles(articleList):
    singleSets = []
    for article in articleList:
        try:
            chunks = gc.getChunks(article[1])
            tags =  tag.getTags(article[1],chunks)
            if tags == []:
                continue # check this is right. go to next itteration
            """The Stanford Open IE tags"""
            subject = tags['subject']
            relation = tags['relation']
            objects = tags['object']
            objects = objects.split()

            content = wp.getArticle(subject)
            rawSentences = sent.getSentences(content)
            sentences = []
            for sentence in rawSentences:
                if(hd.hasDate(sentence) !== []):
                    sentences.append(sentence)
            listOfYears.append(article[0])
            SS = {'title':article[1], 'sentences':sentences, 'year':article[0]}
            singleSets.append(SS)
        except:
            pass
    return singleSets
コード例 #2
0
ファイル: index.py プロジェクト: JFriel/honours_project
import app.parser.articleRetrieval.wikipediaParse as wp
import app.parser.sentences as sent
import app.analytics.sentenceFiltering.actionSentences as action
import app.analytics.functions.hasDate as hd
import app.analytics.functions.synonym as sn
import app.analytics.getFeatures as ft

articles = importArticles.getData()

sentences= []
count = 0
for article in articles[0:10]:
    print article
    chunks = gc.getChunks(article[1])
    tags =  tag.getTags(article[1],chunks)
    if tags == []:
        continue # check this is right. go to next itteration
    """The Stanford Open IE tags"""
    subject = tags['subject']
    relation = tags['relation']
    objects = tags['object']
    objects = objects.split()
    print objects
    print relation
    print subject

    article = wp.getArticle(subject)
    sentences = sent.getSentences(article)

    features= ft.getFeatures(subject, objects, relation, sentences)