コード例 #1
0
ファイル: search.py プロジェクト: Zarmakuizz/articleanalysis
def getPaperByWords(words, docNumber = 10):
    '''Get a list of articles for a given request.
    :param words: list of words for the query.
    :param docNumber: count of max nomber of results to get.
    :returns: a dictionnary of [nameArticle=>table count[,]]. example of use: data["Article"] = [42, 2]'''
    article = Article.all()
    data = collections.defaultdict(list)
    for oneArticle in article:
        nbWord = 0
        nbOccur = []
        for strWord in words:
            mapReduce = MapReduce.all()
            mapReduce.filter('keyArticle = ', oneArticle)
            mapReduce.filter('keyWord =', strWord)
            if mapReduce.count() > 0:
                mR = mapReduce.get()
                nbWord += mR.count
                nbOccur.append(mR.count)
            else :
                nbOccur.append(0)
        if nbWord != 0 :
            data[oneArticle.name] = nbOccur
        
    # Sort the results based on the sum of each word's occurences
    sortedList = data.items()
    sortedList.sort(key=lambda x: sum(x[1]), reverse=True)
    return OrderedDict(sortedList)
コード例 #2
0
ファイル: search.py プロジェクト: Zarmakuizz/articleanalysis
def getWordsMostFreqByAuthor(authorName, wordNumber = 10):
    ''' Get the keyword's stats for a given author.
    :param authorName: The asked author's name.
    :param wordNumber: the max number of results.
    :returns: a dictionnary of [word=>count]. example of use: data["lol"] = 42'''
    author = Author.all()
    author.filter('name = ', authorName)
    artiAuths = ArtiAuth.all()
    artiAuths.filter('keyAuthor = ', author.get())
    data = collections.defaultdict(list)
    for article in artiAuths :
        mapReduces = MapReduce.all()
        mapReduces.filter('keyArticle = ', article.keyArticle)
        for mR in mapReduces:
            try:
                if data[mR.keyWord]:
                    data[mR.keyWord] += mR.count
                else:
                    data[mR.keyWord] = mR.count
            except ReferencePropertyResolveError :
                print 'Pas de reference word'
    
    # Sort the results based on the sum of each word's occurences
    sortedList = data.items()
    sortedList.sort(key=lambda x: x[1], reverse=True)
    return OrderedDict(sortedList[0:wordNumber-1])
コード例 #3
0
def deleteData():
    """
        Delete all the data
    """
    masters = Master.all()
    for master in masters:
        Master.delete(master)

    mapReduces = MapReduce.all()
    for mapReduce in mapReduces:
        MapReduce.delete(mapReduce)

    articles = Article.all()
    for article in articles:
        Article.delete(article)

    authors = Author.all()
    for author in authors:
        Author.delete(author)

    artCitedBibs = ArtCitedBib.all()
    for artCitedBib in artCitedBibs:
        ArtCitedBib.delete(artCitedBib)