def getPaperByWords(words, docNumber = 10): '''Get a list of articles for a given request. :param words: list of words for the query. :param docNumber: count of max nomber of results to get. :returns: a dictionnary of [nameArticle=>table count[,]]. example of use: data["Article"] = [42, 2]''' article = Article.all() data = collections.defaultdict(list) for oneArticle in article: nbWord = 0 nbOccur = [] for strWord in words: mapReduce = MapReduce.all() mapReduce.filter('keyArticle = ', oneArticle) mapReduce.filter('keyWord =', strWord) if mapReduce.count() > 0: mR = mapReduce.get() nbWord += mR.count nbOccur.append(mR.count) else : nbOccur.append(0) if nbWord != 0 : data[oneArticle.name] = nbOccur # Sort the results based on the sum of each word's occurences sortedList = data.items() sortedList.sort(key=lambda x: sum(x[1]), reverse=True) return OrderedDict(sortedList)
def saveMapReduce(namefic): """ Converted the pdf file in text Do the mapper and reduce in the text Get the references cited in the article Save data Author, Article, ArtCitedBib, MapReduce, Master :param namefic : the name file """ # save_pdf(namefic) fic = convert_pdf_to_txt(namefic) dataDict = mapper(fic) dataDict = reducer(dataDict) lines = re.split(r"\n", fic) authorStr = re.sub(r"[^a-zA-Z\s]", " ", lines[4]) author = Author(name=authorStr) author.put() titre = re.sub(r"[^a-zA-Z\s]", " ", lines[0] + lines[1]) titre = titre.strip() article = Article(name=titre, fileName=namefic) article.put() getReferences(fic, article) artiAuth = ArtiAuth(keyAuthor=author, keyArticle=article) artiAuth.put() for cle in dataDict.keys(): mapReduce = MapReduce(keyWord=cle, keyArticle=article, count=dataDict[cle]) mapReduce.put() checkMaster = Master.all() checkMaster.filter("keyWord =", cle) if checkMaster.count() > 0: master = checkMaster.get() master.count = master.count + dataDict[cle] else: master = Master(keyWord=cle, count=dataDict[cle]) master.put()
def deleteData(): """ Delete all the data """ masters = Master.all() for master in masters: Master.delete(master) mapReduces = MapReduce.all() for mapReduce in mapReduces: MapReduce.delete(mapReduce) articles = Article.all() for article in articles: Article.delete(article) authors = Author.all() for author in authors: Author.delete(author) artCitedBibs = ArtCitedBib.all() for artCitedBib in artCitedBibs: ArtCitedBib.delete(artCitedBib)
def getArtCitedFromArt(nameArt): ''' Give articles cited in an article :param nameArt: name of the article :returns: list of names of articles ''' article = Article.all() article.filter('name =', nameArt) result = article.get() artCitedBib = ArtCitedBib.all() artCitedBib.filter('keyArticle = ', result) data = [] for artiCited in artCitedBib: data.append(artiCited.nameArticle) return data