Ejemplo n.º 1
0
def parseTwitter():
    ArtList = list()

    # Fichier de source de twitter
    PATH_FileTWEET = twitter.PATH_FileRes

    articleTWEET = reader(PATH_FileTWEET)

    print("==- Twitter START -==")
    for i in articleTWEET:
        item = i
        titre = "Tweet de " + item["author"]
        try:
            titre = titre + "-" + item["hashtags"][0]
        except:
            pass
        auteur = item["author"]
        info_source = item["type"]
        lien = "https://twitter.com/statuses/" + item["id"]
        resume = item["text"]
        try:
            lien_img = item["entries"]["photos"][0]
        except:
            try:
                lien_img = item["entries"]["videos"][0]
            except:
                lien_img = None
        date = int(item["time"][:10])
        module_source = "Twitter"
        ArtList.append(
            Article(hash(titre), titre, auteur, info_source, lien, resume,
                    lien_img, date, module_source))
    print("===- Twitter OK -===")

    return ArtList
Ejemplo n.º 2
0
def readArticle(ID):
    """
    Take back an article from DB

    ID -> article object
    return -> The Article requested
    """
    # 1st search in the globallist
    if globalArtList is not None:
        for item in globalArtList:
            if item.ID == ID:
                return item

    # 2nd search in db
    artDB = TinyDB("mainCol.json")
    A = Article()
    DictArt = artDB.search(Query().ID == ID)
    if DictArt == []:
        artDB.close()
        return -1
    else:
        for key in DictArt[0]:
            setattr(A, key, DictArt[0][key])

    artDB.close()
    return A
Ejemplo n.º 3
0
def parseReddit():
    ArtList = list()

    # Fichier de source de reddit
    PATH_FileREDDIT = reddit.PATH_FileRes

    # Read non foramted file
    articleREDDIT = reader(PATH_FileREDDIT)

    print("==- Reddit START -==")
    for i in articleREDDIT:
        for item in articleREDDIT[i]:
            titre = item["title"]
            try:
                auteur = item["author"]
            except:
                auteur = None
            info_source = item["tags"][0]["label"]
            lien = item["link"]
            resume = withoutHTML(item["summary"])
            lien_img = None
            date = u.convert_time(item["updated"])
            module_source = item["from"]
            ArtList.append(
                Article(hash(titre), titre, auteur, info_source, lien, resume,
                        lien_img, date, module_source))
    print("===- Reddit OK -===")

    return ArtList
Ejemplo n.º 4
0
def parseFeed():
    ArtList = list()

    # Fichier de source de feed
    PATH_FileFEED = feed.PATH_FileRes

    # read non-formated file
    articleFEED = reader(PATH_FileFEED)

    print("===- Feed START -===")
    for i in articleFEED:
        for item in articleFEED[i]:
            titre = withoutHTML(item["title"])
            try:
                auteur = item["author"]
            except:
                auteur = None
            info_source = item["source"]
            lien = item["link"]
            try:
                resume = withoutHTML(item["summary"])
            except:
                resume = None
            try:
                lien_img = item["links"][1]["href"]
            except:
                lien_img = None
            date = item["published"]
            module_source = item["from"]
            ArtList.append(
                Article(hash(titre), titre, auteur, info_source, lien, resume,
                        lien_img, date, module_source))
    print("===- Feed OK -===")

    return ArtList
Ejemplo n.º 5
0
 def get_article(self) -> List[Article]:
     try:
         with self._get_connection() as conn:
             cur = conn.cursor()
             cur.execute(self.QUERY_ARTICLE_GET)
             raw_articles = cur.fetchall()
             return list(map(lambda a: Article(*a), raw_articles))
     except Exception:
         logger.exception("Failed to get articles")
         raise
Ejemplo n.º 6
0
 def _parse(self, content: str) -> List[Article]:
     soup = BeautifulSoup(content, "html.parser")
     titles = soup.find_all("a", "storylink")
     return [
         Article(
             header=x.text,
             author="Anonymous",
             created="1970-01-01",
             points=1,
             link=x["href"],
             num_comments=0,
         ) for x in titles
     ]
Ejemplo n.º 7
0
def readAllArticles():
    """
    Return an list with all articles
    """
    artDB = TinyDB("mainCol.json")

    listDict = artDB.all()

    res = list()

    for art in listDict:
        A = Article()
        for key in art:
            setattr(A, key, art[key])
        res.append(A)

    artDB.close()
    return res
Ejemplo n.º 8
0
def parseNAC():
    ArtList = list()
    # Fichier de source de newsAPI
    PATH_FileNA = napi.PATH_FileRes

    # Read non-formated file
    articleNA = reader(PATH_FileNA)

    print("===- News API START -===")
    for item in articleNA["articles"]:
        titre = item["title"]
        auteur = item["author"]
        info_source = item["source"]["name"]
        lien = item["url"]
        resume = item["content"]
        lien_img = item["urlToImage"]
        date = u.convert_time(item["publishedAt"])
        module_source = item["from"]
        ArtList.append(
            Article(hash(titre), titre, auteur, info_source, lien, resume,
                    lien_img, date, module_source))
    print("===- News Api OK -===")

    return ArtList