def set_user_from_climate_feedback_user_scrap(user, path, store=None): if store is None: store = {} result = requests.get('https://climatefeedback.org{}'.format(path)) soup = BeautifulSoup(result.text, 'html.parser') info = soup.find("div", class_="med-body") user.affiliation = situation_line.split(",")[1] user.external_thumb_url = soup.find("img", class_="avatar")['src'] user.title = situation_line.split(",")[0] name = info.find("h2", class_="noborder").text first_name = None last_name = name if ' ' in name: name_chunks = name.split(' ') first_name = name_chunks[0] last_name = name_chunks[1:] paragraphs = info.find_all("p") situation_line = paragraphs[0].text user.firstName = first_name user.lastName = last_name expertise_line = paragraphs[1].text if 'Expertise:' in expertise_line: expertise = expertise_line.split('Expertise: ')[1] else: expertise = None user.expertise = expertise orcid = info.find("a", href=re.compile("https://orcid.org/(.*)")) if orcid: user.orcidId = orcid['href'].split('https://orcid.org/')[1] website = info.find("a", text="Website") if website: user.websiteUrl = website['href'] publication_image = info.find("img", alt="publication") if publication_image: publication_anchors = publication_image.parent.find_all("a") for publication_anchor in publication_anchors: publication_dict = { "tags": "isValidatedAsPeerPublication", "url": publication_anchor['href'] } publication = Publication.query.filter_by(url=data['url'])\ .first() if not publication: publication = Article(**publication_dict) publication.populate_from_dict( resolve_with_url(publication.url)) UserArticle(article=publication, user=user)