def category(soup): i = 0 while True: meta = json.loads(soup.select( 'script[type="application/ld+json"]')[i].text) df = catch('None', lambda: pd.DataFrame(meta)) meta_check = any(word in '@type' for word in list(meta.keys())) category = catch('None', meta.get('@type') if meta_check == True else df['@type'][0] if len(df) != 0 else 'N/A') if '' != category or i == 3: break i += 1
def date(soup): i = 0 while True: meta = json.loads(soup.select( 'script[type="application/ld+json"]')[i].text) df = catch('None', lambda: pd.DataFrame(meta)) meta_check = any(word in 'datePublished' for word in list(meta.keys())) date = catch('None', lambda: meta.get('datePublished') if meta_check == True else df['datePublished'][0] if df != None else meta[0].get('datePublished') if type(meta) == list else 'N/A') if '' != date or i == 3: break i += 1 return date
def author(soup): i = 0 while True: meta = json.loads(soup.select( 'script[type="application/ld+json"]')[i].text) df = catch('None', lambda: pd.DataFrame(meta)) meta_check = any(word in 'author' for word in list(meta.keys())) authors = catch('None', lambda: meta.get('author') if meta_check == True else df['author'][0] if df != None else meta.get( 'author')['name'] if meta_check == True else meta[0].get('author')['name'] if type(meta) == list else 'N/A') if '' != authors or i == 3: break i += 1 return author