def parseTwitter(): ArtList = list() # Fichier de source de twitter PATH_FileTWEET = twitter.PATH_FileRes articleTWEET = reader(PATH_FileTWEET) print("==- Twitter START -==") for i in articleTWEET: item = i titre = "Tweet de " + item["author"] try: titre = titre + "-" + item["hashtags"][0] except: pass auteur = item["author"] info_source = item["type"] lien = "https://twitter.com/statuses/" + item["id"] resume = item["text"] try: lien_img = item["entries"]["photos"][0] except: try: lien_img = item["entries"]["videos"][0] except: lien_img = None date = int(item["time"][:10]) module_source = "Twitter" ArtList.append( Article(hash(titre), titre, auteur, info_source, lien, resume, lien_img, date, module_source)) print("===- Twitter OK -===") return ArtList
def readArticle(ID): """ Take back an article from DB ID -> article object return -> The Article requested """ # 1st search in the globallist if globalArtList is not None: for item in globalArtList: if item.ID == ID: return item # 2nd search in db artDB = TinyDB("mainCol.json") A = Article() DictArt = artDB.search(Query().ID == ID) if DictArt == []: artDB.close() return -1 else: for key in DictArt[0]: setattr(A, key, DictArt[0][key]) artDB.close() return A
def parseReddit(): ArtList = list() # Fichier de source de reddit PATH_FileREDDIT = reddit.PATH_FileRes # Read non foramted file articleREDDIT = reader(PATH_FileREDDIT) print("==- Reddit START -==") for i in articleREDDIT: for item in articleREDDIT[i]: titre = item["title"] try: auteur = item["author"] except: auteur = None info_source = item["tags"][0]["label"] lien = item["link"] resume = withoutHTML(item["summary"]) lien_img = None date = u.convert_time(item["updated"]) module_source = item["from"] ArtList.append( Article(hash(titre), titre, auteur, info_source, lien, resume, lien_img, date, module_source)) print("===- Reddit OK -===") return ArtList
def parseFeed(): ArtList = list() # Fichier de source de feed PATH_FileFEED = feed.PATH_FileRes # read non-formated file articleFEED = reader(PATH_FileFEED) print("===- Feed START -===") for i in articleFEED: for item in articleFEED[i]: titre = withoutHTML(item["title"]) try: auteur = item["author"] except: auteur = None info_source = item["source"] lien = item["link"] try: resume = withoutHTML(item["summary"]) except: resume = None try: lien_img = item["links"][1]["href"] except: lien_img = None date = item["published"] module_source = item["from"] ArtList.append( Article(hash(titre), titre, auteur, info_source, lien, resume, lien_img, date, module_source)) print("===- Feed OK -===") return ArtList
def get_article(self) -> List[Article]: try: with self._get_connection() as conn: cur = conn.cursor() cur.execute(self.QUERY_ARTICLE_GET) raw_articles = cur.fetchall() return list(map(lambda a: Article(*a), raw_articles)) except Exception: logger.exception("Failed to get articles") raise
def _parse(self, content: str) -> List[Article]: soup = BeautifulSoup(content, "html.parser") titles = soup.find_all("a", "storylink") return [ Article( header=x.text, author="Anonymous", created="1970-01-01", points=1, link=x["href"], num_comments=0, ) for x in titles ]
def readAllArticles(): """ Return an list with all articles """ artDB = TinyDB("mainCol.json") listDict = artDB.all() res = list() for art in listDict: A = Article() for key in art: setattr(A, key, art[key]) res.append(A) artDB.close() return res
def parseNAC(): ArtList = list() # Fichier de source de newsAPI PATH_FileNA = napi.PATH_FileRes # Read non-formated file articleNA = reader(PATH_FileNA) print("===- News API START -===") for item in articleNA["articles"]: titre = item["title"] auteur = item["author"] info_source = item["source"]["name"] lien = item["url"] resume = item["content"] lien_img = item["urlToImage"] date = u.convert_time(item["publishedAt"]) module_source = item["from"] ArtList.append( Article(hash(titre), titre, auteur, info_source, lien, resume, lien_img, date, module_source)) print("===- News Api OK -===") return ArtList