예제 #1
0
    def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup,
                                 url: str) -> List[Claim]:
        claim = Claim()
        claim.set_url(url)
        claim.set_source("checkyourfact")

        # title
        title = parsed_claim_review_page.find('article').find("h1")
        claim.set_title(title.text.replace("FACT CHECK: ", ""))

        url_date = url.replace("https://checkyourfact.com/",
                               "").replace("/", " ").split(" ")
        claim.set_date(url_date[0] + "-" + url_date[1] + "-" + url_date[2])

        # author & author_url
        if parsed_claim_review_page.select('detail > article > author'):
            for author in parsed_claim_review_page.select(
                    'detail > article > author'):
                if (hasattr(author, "data-slug")):
                    author_str = author.text.split("|")[0].strip().split(
                        "\n")[0]
                    claim.author = author_str
                    claim.author_url = "https://checkyourfact.com/author/" + author[
                        'data-slug']
                    break

        # body
        body = parsed_claim_review_page.find("article")
        claim.set_body(body.get_text())

        # related links
        div_tag = parsed_claim_review_page.find("article")
        related_links = []
        for link in div_tag.findAll('a', href=True):
            related_links.append(link['href'])
        claim.set_refered_links(related_links)

        claim.set_claim(claim.title)

        # rating
        rating = find_by_text(parsed_claim_review_page, "Verdict", "span")
        if rating:
            rating_text = rating[0].text.split(":")[-1].strip()
            claim.set_rating(rating_text)
        else:
            pass

        tags = []

        for tag in parsed_claim_review_page.findAll(
                'meta', {"property": "article:tag"}):
            tags.append(tag["content"])
        claim.set_tags(", ".join(tags))
        if len(claim.rating) == 0:
            return []
        else:
            return [claim]
예제 #2
0
    def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup,
                                 url: str) -> List[Claim]:
        if url in url_blacklist:
            return []
        claim = Claim()

        # url
        claim.url = str(url)

        # souce
        claim.source = "snopes"

        # title
        title = None
        if parsed_claim_review_page.select('article > header > h1'):
            for tmp in parsed_claim_review_page.select(
                    'article > header > h1'):
                title = tmp.text.strip()
            #sub_title = parsed_claim_review_page.select( 'article > header > h2' )
            claim.title = str(title.strip())

        # author
        author_list = []
        author_links = []
        if parsed_claim_review_page.select(
                'article > header > ul.list-unstyled.authors.list-unstyled.d-flex.flex-wrap.comma-separated > li > a'
        ):
            for author_a in parsed_claim_review_page.select(
                    'article > header > ul.list-unstyled.authors.list-unstyled.d-flex.flex-wrap.comma-separated > li > a'
            ):
                if hasattr(author_a, 'href'):
                    author_list.append(author_a.text.strip())
                    author_links.append(author_a.attrs['href'])
                else:
                    print("no author?")

        claim.author = ", ".join(author_list)
        claim.author_url = (", ".join(author_links))

        # review_author ?
        # -

        # date
        datePub = None
        dateUpd = None
        date_str = ""
        date_ = parsed_claim_review_page.find('ul', {"class": "dates"})

        if date_:
            dates = date_.find('li', {"class": "font-weight-bold text-muted"})
            dateSpans = dates.span
            for dateItems in dateSpans:
                if dateItems == 'Published':
                    datePub = dateItems.next.strip()
                    date_str = dateparser.parse(datePub).strftime("%Y-%m-%d")
                    claim.date_published = date_str
                    claim.date = date_str
                if dateItems == 'Updated':
                    dateUpd = dateItems.next.strip()
                    date_str = dateparser.parse(dateUpd).strftime("%Y-%m-%d")
                    claim.date = date_str

        # claim image?
        # -

        # claim
        claim_text = None
        if parsed_claim_review_page.select(
                'article > div > div.claim-text.card-body'):
            for p in parsed_claim_review_page.select(
                    'article > div > div.claim-text.card-body'):
                if hasattr(p, 'text'):
                    claim_text = p.text.strip()
            claim.claim = str(claim_text).strip()

        # rating -> https://www.snopes.com/fact-check-ratings/
        rating = None
        if parsed_claim_review_page.select(
                'article > div > div > div > div.media-body > span'):
            for rating_span in parsed_claim_review_page.select(
                    'article > div > div > div > div.media-body > span'):
                rating = rating_span.text.strip()
            claim.rating = str(rating).replace('"', "").strip()
        # claim.set_rating_value( rating )

        # rating best
        whats_true = None
        if parsed_claim_review_page.select(
                'article > div > div > div.whats-true > div > p'):
            for rating_span_true in parsed_claim_review_page.select(
                    'article > div > div > div.whats-true > div > p'):
                whats_true = rating_span_true.text.strip()
            if whats_true:
                whats_true = str(whats_true).replace('"', "")
                # Text: (not Numerical value)
                # claim.best_rating = whats_true

        # rating worst
        whats_true = False
        if parsed_claim_review_page.select(
                'article > div > div > div.whats-false > div > p'):
            for rating_span_false in parsed_claim_review_page.select(
                    'article > div > div > div.whats-false > div > p'):
                whats_false = rating_span_false.text.strip()
            if whats_false:
                whats_false = str(whats_true).replace('"', "")
                # Text: (not Numerical value)
                # claim.worst_rating = whats_false

        # rating Undetermined?
        whats_undetermined = False
        if parsed_claim_review_page.select(
                'article > div > div > div.whats-undetermined > div > p'):
            for rating_span_undetermined in parsed_claim_review_page.select(
                    'article > div > div > div.whats-undetermined > div > p'):
                whats_undetermined = rating_span_undetermined.text.strip()
            if whats_undetermined:
                whats_undetermined = str(whats_undetermined).replace('"', "")
                # Text: (not Numerical value)
                # claim.whats_undetermined = whats_undetermined

        # rating value ?
        # -

        # Body descriptioon
        text = ""
        if parsed_claim_review_page.select(
                'article > div.single-body.card.card-body.rich-text > p'):
            for child in parsed_claim_review_page.select(
                    'article > div.single-body.card.card-body.rich-text > p'):
                text += " " + child.text
            body_description = text.strip()
            claim.body = str(body_description).strip()

        # related links
        related_links = []
        if parsed_claim_review_page.select(
                'article > div.single-body.card.card-body > p > a'):
            for link in parsed_claim_review_page.select(
                    'article > div.single-body.card.card-body > p > a'):
                if hasattr(link, 'href'):
                    related_links.append(link['href'])
            claim.referred_links = related_links

        # tags
        tags = []
        if parsed_claim_review_page.select(
                'article > footer > div > a > div > div'):
            for tag in parsed_claim_review_page.select(
                    'article > footer > div > a > div > div'):
                if hasattr(tag, 'text'):
                    tags.append(tag.text.strip())
            claim.tags = ", ".join(tags)

        # same as ?
        # -

        #  No Rating? No Claim?
        if not claim_text or not rating:
            print(url)
            if not rating:
                print("-> Rating cannot be found!")
            if not claim_text:
                print("-> Claim cannot be found!")
            return []

        return [claim]
예제 #3
0
    def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup,
                                 url: str) -> List[Claim]:
        claims = []
        claim = Claim()

        # url
        claim.url = str(url)

        # souce
        claim.source = "fullfact"

        # title
        title = None
        if parsed_claim_review_page.select(
                'body > main > div > div > section > article > h1'):
            for tmp in parsed_claim_review_page.select(
                    'body > main > div > div > section > article > h1'):
                title = tmp.text.strip()
            claim.title = str(title.strip())

        # author
        author_list = []
        author_links = []
        # single author?
        if parsed_claim_review_page.select(
                'article > section.social-media > div > div > ul > li > span > cite'
        ):
            for author_a in parsed_claim_review_page.select(
                    'article > section.social-media > div > div > ul > li > span > cite'
            ):
                if hasattr(author_a, 'text'):
                    author_list.append(author_a.text.strip())
                # if hasattr( author_a, 'href' ):
                #    author_list.append( author_a.text.strip() )
                #    author_links.append( author_a.attrs['href'] )
                else:
                    print("no author? https://fullfact.org/about/our-team/")

        claim.author = ", ".join(author_list)
        #claim.author_url = ( ", ".join( author_links ) )

        # date
        datePub = None
        dateUpd = None
        date_str = ""
        # updated?
        if parsed_claim_review_page.select('article > div.published-at'):
            for date_ in parsed_claim_review_page.select(
                    'article > div.published-at'):
                if hasattr(date_, 'text'):
                    datePub = date_.text.strip()
                    if "|" in datePub:
                        split_datePub = datePub.split("|")
                        if len(split_datePub) > 0:
                            datePub = split_datePub[0].strip()
                    date_str = dateparser.parse(datePub).strftime("%Y-%m-%d")
                    claim.date_published = date_str
                    claim.date = date_str
                else:
                    print("no date?")

        # Body descriptioon
        text = ""
        if parsed_claim_review_page.select('article > p'):
            for child in parsed_claim_review_page.select('article > p'):
                text += " " + child.text
            body_description = text.strip()
            claim.body = str(body_description).strip()

        # related links (in page body text <p>)
        related_links = []
        if parsed_claim_review_page.select('article > p > a'):
            for link in parsed_claim_review_page.select('article > p > a'):
                try:
                    if hasattr(link, 'href'):
                        if 'http' in link['href']:
                            related_links.append(link['href'])
                        else:
                            related_links.append("https://fullfact.org" +
                                                 link['href'])
                except KeyError as e:
                    print("->KeyError: " + str(e))
                    continue
                except IndexError as e:
                    print("->IndexError : " + str(e))
                    continue

        # related links (in Related fact checks)
        if parsed_claim_review_page.select(
                'section.related-factchecks > div > ul > li > a'):
            for link in parsed_claim_review_page.select(
                    'section.related-factchecks > div > ul > li > a'):
                try:
                    if hasattr(link, 'href'):
                        if 'http' in link['href']:
                            related_links.append(link['href'])
                        else:
                            related_links.append("https://fullfact.org" +
                                                 link['href'])
                except KeyError as e:
                    print("->KeyError: " + str(e))
                    continue
                except IndexError as e:
                    print("->IndexError: " + str(e))
                    continue

        if related_links:
            claim.referred_links = related_links

        # cannot be found on fullfact:
        # self.tags = ""
        # self.author_url = ""
        # self.date_published = ""
        # self.same_as = ""
        # self.rating_value = ""
        # self.worst_rating = ""
        # self.best_rating = ""
        # self.review_author = ""

        # claim # multiple (local) claims: 'article > div > div > div.row.no-gutters.card-body-text > div > div > p' ?
        claim_text_list = []
        claim_text = None
        # rating -> VERDICT: extract_conclusion -> true, false, ...
        claim_verdict_list = []
        claim_verdict = None

        column = "claim"  # or verdict:
        if parsed_claim_review_page.select(
                'body > main > div > div > section > article > div > div > div.row.no-gutters.card-body-text > div > div > p'
        ):
            for p in parsed_claim_review_page.select(
                    'body > main > div > div > section > article > div > div > div.row.no-gutters.card-body-text > div > div > p'
            ):
                if hasattr(p, 'text'):
                    if column == "claim":
                        claim_text_list.append(p.text.strip())
                        if claim_text == None:
                            claim_text = p.text.strip()
                        column = "verdict"
                    else:
                        rating_word_list = p.text
                        conclusion_text = self._conclusion_processor.extract_conclusion(
                            rating_word_list)
                        #print ("conclusion_text: " + conclusion_text)
                        rating = str(conclusion_text).replace('"', "").strip()
                        if "." in rating:
                            split_name = rating.split(".")
                            if len(split_name) > 0:
                                rating = split_name[0]
                        claim_verdict_list.append(rating)
                        if claim_verdict == None:
                            claim_verdict = rating

                        column = "claim"

            # First local claim and rating:
            claim.claim = claim_text
            claim.rating = claim_verdict

            # All claims and ratings "comma" separated: get all claims?
            # claim.claim = ", ".join( claim_text_list )
            # claim.rating = ", ".join( verdict_text_list )

            # Create multiple claims from the main one and add change then the claim text and verdict (rating):
            c = 0
            while c <= len(claim_text_list) - 1:
                claims.append(claim)
                claims[c].claim = claim_text_list[c]
                claims[c].rating = claim_verdict_list[c]
                c += 1

            # for local_claim in claim_text_list:
            #    claims[claim[len(claim)]] = claims[claim[len(claim)-1]]

        # No Rating? No Claim?
        if not claim.claim or not claim.rating:
            print(url)
            if not claim.rating:
                print("-> Rating cannot be found!")
            if not claim.claim:
                print("-> Claim cannot be found!")
            return []

        # return [claim]
        return claims
예제 #4
0
    def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup,
                                 url: str) -> List[Claim]:
        claim = Claim()
        claim.set_url(url)
        claim.set_source("newtral")
        #title, claim and autor claim
        title = parsed_claim_review_page.find("meta",
                                              attrs={'property':
                                                     'og:title'})['content']
        title = title.strip().split("|")[0]
        claim.set_title(title)
        entry_content = parsed_claim_review_page.find(
            "div", attrs={'class': 'entry-content'})
        #print (title)
        dospunto = re.search(r'(: «)', title)
        dospunt = re.search(r'(: “)', title)

        if dospunto:
            claim_a = title.split(":")
            auteur = claim_a[0].strip()
            claim.author = auteur
            claim_text = claim_a[1].strip("« »")
            claim.claim = claim_text
            #print (claim_a)

        elif dospunt:
            claim_b = title.split(":")
            auteur = claim_b[0].strip()
            # print ("auteur:" , auteur)
            claim.author = auteur
            claim_text = claim_b[1].strip(": “ ”")
            # print ("claim :", claim)
            claim.claim = claim_text
        else:
            pass
        #multiple title or claim
        claim_mult = entry_content.findAll('h2')

        if claim_mult:
            claim_al = [i.text.strip() for i in claim_mult]
            dospunt = re.search(r'(: “)', claim_al)
            dospunto = re.search(r'(: «)', claim_al)
            if dospunt:
                claim_b = title.split(":")
                auteur = claim_b[0].strip()
                # print ("auteur:" , auteur)
                claim.author = auteur
                claim_text = claim_b[1].strip(": “ ”")
                # print ("claim :", claim)
                claim.claim = claim_text
            elif dospunto:
                claim_a = title.split(":")
                auteur = claim_a[0].strip()
                claim.author = auteur
                claim_text = claim_a[1].strip("« »")
                claim.claim = claim_text
                #print (claim_a)
            else:
                claim.set_title(claim_al)

                #tags
        tags = parsed_claim_review_page.find_all(
            "meta", attrs={'property': 'article:tag'})
        tag_list = []
        for tag in tags:
            tag_text = tag['content']
            tag_list.append(tag_text)
        claim.set_tags(",".join(tag_list))

        #date pubished
        published = parsed_claim_review_page.find(
            "meta", attrs={'property': 'article:published_time'})['content']
        claim.date_published = published.strip()

        #autor article
        author_span = parsed_claim_review_page.find(
            "span", attrs={'class': 'c-article__author'})
        author_a = author_span.find("a")
        author_url = author_a['href']
        author_text = author_a.text
        author_text = re.sub('Por', '', author_text).strip()
        claim.author_url = author_url
        claim.review_author = author_text

        # Recuperation du texte de l'article

        entry_text = ""
        body_t = entry_content.find_all('p')
        body = [text.text.strip() for text in body_t]
        entry_text += " ".join(body) + "\n"
        claim.body = entry_text

        # Recuperation des liens dans le texte de l'article
        links = [
            link['href'] for link in entry_content.find_all('a', href=True)
        ]
        claim.referred_links = links

        #Veracite
        intro = parsed_claim_review_page.find(
            "div", attrs={'class': 'c-article__intro'})

        veracities = [
            "ENGAÑOSA", "ENGAÑOSO", "FALSO", "FALSA", "FALSOS", "VERDADERO",
            "VERDAD A MEDIAS"
        ]

        def common(a, b):
            c = [value for value in a if value in b]
            return c

        if intro:
            intro_p = " ".join(str(v) for v in intro)
            #print(type(body_t))
            rating_text_list = intro_p.upper()
            rating_text = [
                i.strip() for i in common(veracities, rating_text_list)
            ]
            claim.alternate_name = rating_text

        else:
            body_a = " ".join(str(v) for v in body)
            #print(type(body_t))
            rating_text_list = body_a.upper()
            rating_text = [
                i.strip() for i in common(veracities, rating_text_list)
            ]
            claim.alternate_name = rating_text

        return [claim]
예제 #5
0
    def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url: str) -> List[Claim]:
        claim = Claim()
        claim.set_url(url)
        claim.set_source("newtral")

        title = parsed_claim_review_page.find("meta", attrs={'property': 'og:title'})['content']
        title = title.strip().split("|")[0]
        claim.set_title(title)

        dospunto = re.search(r'(: «)', title)
        dospunt = re.search(r'(: “)', title)

        if dospunto:
            claim_a = title.split(":")
            auteur = claim_a[0].strip()
            claim.author = auteur
            # print ("auteur:" , auteur)
            claim_text = claim_a[1].strip("« »")
            claim.claim = claim_text

        elif dospunt:
            claim_b = title.split(":")
            auteur = claim_b[0].strip()
            # print ("auteur:" , auteur)
            claim.author = auteur
            claim_text = claim_b[1].strip(": “ ”")
            # print ("claim :", claim)
            claim.claim = claim_text
        else:
            pass

        tags = parsed_claim_review_page.find_all("meta", attrs={'property': 'article:tag'})
        tag_list = []
        for tag in tags:
            tag_text = tag['content']
            tag_list.append(tag_text)
        claim.set_tags(",".join(tag_list))

        published = parsed_claim_review_page.find("meta", attrs={'property': 'article:published_time'})[
            'content']
        claim.date_published = published.strip()

        entry_content = parsed_claim_review_page.find("div", attrs={'class': 'entry-content'})

        intro = parsed_claim_review_page.find("div", attrs={'class': 'c-article__intro'})
        if intro is None:
            intro_rating_p = entry_content.find("em")
            if intro_rating_p is None:
                intro_rating_p = entry_content.find("p")
            if intro_rating_p is None:
                intro_rating_p = entry_content.find("div")
        else:
            intro_rating_p = intro.p
        rating_in_image = False
        if intro_rating_p is None:  # Rating in image...
            rating_in_image = True
            rating_text = ""
        else:
            rating_text = intro_rating_p.get_text()

        rating_re_es_falso = regex.compile(
            r"(La afirmación es|La afirmación es una|La declaración es|Es|El dato es" + \
            "|La comparación de Colau es)? ?([\p{Lu}| ]+)(\.| –|,| )")

        es_falso_match = rating_re_es_falso.match(rating_text)
        if es_falso_match is not None and es_falso_match.group(2) is not None:
            rating_text = es_falso_match.group(2)
        else:
            if not rating_in_image:
                is_there_b = intro_rating_p.find('b')
                if is_there_b is not None:
                    rating_text = is_there_b.text
                else:
                    is_there_strong = intro_rating_p.find("strong")
                    if is_there_strong is not None:
                        rating_text = is_there_strong.text
                    else:
                        pass

        claim.rating = rating_text

        author_span = parsed_claim_review_page.find("span", attrs={'class': 'c-article__author'})
        author_a = author_span.find("a")
        author_url = author_a['href']
        author_text = author_a.text
        author_text = re.sub('Por', '', author_text).strip()
        claim.author_url = author_url
        claim.review_author = author_text

        # Recuperation du texte de l'article

        entry_text = ""
        body_t = entry_content.find_all('p')
        body = [text.text.strip() for text in body_t]
        entry_text += " ".join(body) + "\n"
        claim.body = entry_text

        # Recuperation des liens dans le texte de l'article
        links = [link['href'] for link in entry_content.find_all('a', href=True)]
        claim.referred_links = links

        # else:
        #     title = container.h3.text
        #     titles.append(title)
        #     # print("title", title)
        #     claim_c = hd.h1.text.split(":")
        #     claim_d = hd.h1.text.strip()
        #
        #     if claim_c:
        #         auteur = claim_c[0].strip()
        #         auteurs.append(auteur)
        #         print("auteur:", auteur)
        #         claim = claim_c[1].strip("« »")
        #         claims.append(claim)
        #         # print ("claim :", claim)
        #     # else  :
        #     # print (claim_d)
        #

        return [claim]
예제 #6
0
    def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup,
                                 url: str) -> List[Claim]:
        local_claims = []
        claim = Claim()
        claim.set_url(url)
        claim.set_source("africacheck")

        # title
        title = parsed_claim_review_page.find("meta", {"property": "og:title"})
        global_title_text = title['content']
        claim.set_title(global_title_text)

        # date
        date = parsed_claim_review_page.find("span", {
            "class": "published"
        }).next
        global_date_str = ""
        if date:
            # global_date_str = search_dates(date['datetime'].split(" ")[0])[0][1].strftime("%Y-%m-%d")
            global_date_str = search_dates(date)[0][1].strftime("%Y-%m-%d")
            claim.set_date(global_date_str)

        # author
        author = parsed_claim_review_page.find("div",
                                               {"class": "author-details"})
        if author:
            claim.set_author(author.get_text())

        if parsed_claim_review_page.select('div.author-details > a > h4'):
            for child in parsed_claim_review_page.select(
                    'div.author-details > a > h4'):
                try:
                    claim.author = child.get_text()
                    continue
                except KeyError:
                    print("KeyError: Skip")

        if parsed_claim_review_page.select('div.author-details > a'):
            for child in parsed_claim_review_page.select(
                    'div.author-details > a'):
                try:
                    claim.author_url = child['href']
                    continue
                except KeyError:
                    print("KeyError: Skip")

        # tags
        tags = []

        for tag in parsed_claim_review_page.findAll(
                'meta', {"property": "article:tag"}):
            tags.append(tag["content"])
        claim.set_tags(", ".join(tags))

        # claim
        entry_section = parsed_claim_review_page.find("section",
                                                      {"class", "cell"})
        verdict_box = parsed_claim_review_page.find(
            "div", {"class", "article-details__verdict"})

        if verdict_box and len(
                verdict_box) > 0 and "Verdict" in verdict_box.text:
            report_claim_div = parsed_claim_review_page.find(
                "div", {"class": "field--name-field-claims"})
            if report_claim_div:
                claim.set_claim(report_claim_div.get_text())
            else:
                claim.set_claim(claim.title)

            # rating
            inline_ratings = parsed_claim_review_page.findAll(
                "div", {"class", "rating"})

            if inline_ratings:
                if (hasattr(inline_ratings[0], 'class')):
                    try:
                        if ('class' in inline_ratings[0].attrs):
                            if (inline_ratings[0].attrs['class'][1]):
                                rating_tmp = inline_ratings[0].attrs['class'][
                                    1]
                                claim.rating = rating_tmp.replace(
                                    'rating--', '').replace("-",
                                                            "").capitalize()
                    except KeyError:
                        print("KeyError: Skip")
        else:
            # alternative rating (If there is no article--aside box with verdict)
            global_truth_rating = ""
            if parsed_claim_review_page.find("div",
                                             {"class": "verdict-stamp"}):
                global_truth_rating = parsed_claim_review_page.find(
                    "div", {
                        "class": "verdict-stamp"
                    }).get_text()
            if parsed_claim_review_page.find("div", {"class": "verdict"}):
                global_truth_rating = parsed_claim_review_page.find(
                    "div", {
                        "class": "verdict"
                    }).get_text()
            if parsed_claim_review_page.find("div", {"class": "indicator"}):
                global_truth_rating = parsed_claim_review_page.find(
                    "div", {
                        "class": "indicator"
                    }).get_text()
                if parsed_claim_review_page.find("div", {
                        "class": "indicator"
                }).find('span'):
                    global_truth_rating = parsed_claim_review_page.find(
                        "div", {
                            "class": "indicator"
                        }).find('span').get_text()

            # If still no rathing value, try to extract from picture name
            if (global_truth_rating == ""):
                filename = ""
                if parsed_claim_review_page.select(
                        'div.hero__image > picture'):
                    for child in parsed_claim_review_page.select(
                            'div.hero__image > picture'):
                        # child.contents[1].attrs['srcset']
                        if (hasattr(child, 'contents')):
                            try:
                                filename = child.contents[1].attrs['srcset']
                                continue
                            except KeyError:
                                print("KeyError: Skip")

                if (filename != ""):
                    filename_split = filename.split("/")
                    filename_split = filename_split[len(filename_split) -
                                                    1].split(".png")
                    filename_split = filename_split[0].split("_")
                    if len(filename_split) == 1:
                        global_truth_rating = filename_split[0]
                    else:
                        global_truth_rating = filename_split[
                            len(filename_split) - 1]

                claim.set_rating(
                    str(re.sub('[^A-Za-z0-9 -]+', '',
                               global_truth_rating)).lower().strip().replace(
                                   "pfalse", "false").replace("-",
                                                              "").capitalize())

        if (not self.rating_value_is_valid(claim.rating)):
            print("\nURL: " + url)
            print("\n Rating:" + claim.rating)
            claim.rating = ""

        # body
        body = parsed_claim_review_page.find("div", {"class": "article--main"})
        claim.set_body(body.get_text())

        # related links
        related_links = []
        for link in body.findAll('a', href=True):
            related_links.append(link['href'])
        claim.set_refered_links(related_links)

        if claim.rating:
            return [claim]
        else:
            return []