Beispiel #1
0
def category(soup):
    i = 0
    while True:
        meta = json.loads(soup.select(
            'script[type="application/ld+json"]')[i].text)
        df = catch('None', lambda: pd.DataFrame(meta))
        meta_check = any(word in '@type' for word in list(meta.keys()))
        category = catch('None', meta.get('@type') if meta_check ==
                         True else df['@type'][0] if len(df) != 0 else 'N/A')
        if '' != category or i == 3:
            break
        i += 1
Beispiel #2
0
def date(soup):
    i = 0
    while True:
        meta = json.loads(soup.select(
            'script[type="application/ld+json"]')[i].text)
        df = catch('None', lambda: pd.DataFrame(meta))
        meta_check = any(word in 'datePublished' for word in list(meta.keys()))
        date = catch('None', lambda: meta.get('datePublished') if meta_check ==
                     True else df['datePublished'][0] if df != None else meta[0].get('datePublished') if type(meta) == list else 'N/A')
        if '' != date or i == 3:
            break
        i += 1
    return date
Beispiel #3
0
def author(soup):
    i = 0
    while True:
        meta = json.loads(soup.select(
            'script[type="application/ld+json"]')[i].text)
        df = catch('None', lambda: pd.DataFrame(meta))
        meta_check = any(word in 'author' for word in list(meta.keys()))
        authors = catch('None', lambda: meta.get('author') if meta_check == True else df['author'][0] if df != None else meta.get(
            'author')['name'] if meta_check == True else meta[0].get('author')['name'] if type(meta) == list else 'N/A')
        if '' != authors or i == 3:
            break
        i += 1
    return author