def getReferences(fic, articlePrinc): """ Get the references of the article """ fic = fic.replace("\n", " ") objFic = re.match(r"(.*)REFERENCES(.*)", fic) if objFic: references = re.split("(\[[0-9]+\])", objFic.group(2)) for oneRef in references: sepAuthName = oneRef.split(",") authorsArt = [] for l in sepAuthName: l = l.strip() matchAuth = re.match(r"([A-Z]\..*)", l) if matchAuth: author = re.sub(r"[^a-zA-Z\s\.]", "", matchAuth.group(1)) authorsArt.append(author) else: derMatchAuth = re.match(r" and (.*)", l) if derMatchAuth: author = re.sub(r"[^a-zA-Z\s\.]", "", derMatchAuth.group(1)) authorsArt.append(author) else: nameMatch = re.match(r"(.*)\..*", l) if nameMatch: nameArt = nameMatch.group(1) else: nameArt = l nameArt = re.sub(r"[^a-zA-Z\s]", "", nameArt) if nameArt == "" or len(authorsArt) == 0: break articleCitedBib = ArtCitedBib.all() articleCitedBib.filter("nameArticle =", nameArt) if articleCitedBib.count() > 0: artCitedBib = articleCitedBib.get() artCitedBib.count = artCitedBib.count + 1 else: artCitedBib = ArtCitedBib( keyArticle=articlePrinc, nameArticle=nameArt, authors=authorsArt, count=1 ) artCitedBib.put() break else: print "pas de references"