def content_from_url(url, sync_crowdtangle=False, **kwargs): content = Content.create_or_modify({'__SEARCH_BY__': 'url', 'url': url}) if sync_crowdtangle: attach_crowdtangle_entities_from_content( content, request_start_date='2019-09-01') trending = buzzsumo_trending_from_url(url, **kwargs) if trending: return content.modify(trending) if url: newspaper = newspaper_from_url(url, **kwargs) if newspaper: return content.modify(newspaper) content.urlNotFound = True return content
def attach_crowdtangle_entities_from_content(content, request_start_date): # create a "CrowdTangle" user to testify that these Facebook posts are connected to the url crowdtangle_user = User.create_or_modify({ '__SEARCH_BY__': 'email', 'email': "*****@*****.**", 'password': "******", 'firstName': "Crowd", 'lastName': "Tangle" }) # create the Facebook platform so we can link our Facebook posts media to it: facebook_platform = Platform.create_or_modify({ '__SEARCH_BY__': 'name', 'name': 'Facebook' }) shares = shares_from_url(content.url, request_start_date) for share in shares: medium_group = Medium.create_or_modify({ '__SEARCH_BY__': 'name', 'platform': facebook_platform, **share['account'] }) content_post = Content.create_or_modify({ '__SEARCH_BY__': 'url', 'medium': medium_group, 'type': ContentType.POST, **share['post'] }) crowdtangle_identifier = '{}_{}_{}'.format( content.id, content_post.crowdtangleIdentifier, crowdtangle_user.id) appearance = Appearance.create_or_modify({ '__SEARCH_BY__': 'crowdtangleIdentifier', 'crowdtangleIdentifier': crowdtangle_identifier, 'quotedContent': content, 'quotingContent': content_post, 'testifier': crowdtangle_user })
def appearance_from_row(row, unused_index=None): reviewed_items = row.get('Item reviewed') if not reviewed_items: return None quoting_content = Content.create_or_modify({ '__SEARCH_BY__': 'url', 'url': row['url'].strip() }) medium_science_feedback_ids = row.get('Outlet') if medium_science_feedback_ids: medium = Medium.query.filter_by( scienceFeedbackIdentifier=medium_science_feedback_ids[0]).first() quoting_content.mediumId = medium.id author_science_feedback_ids = row.get('Authors') if author_science_feedback_ids: for author_science_feedback_id in author_science_feedback_ids: author = User.query.filter_by( scienceFeedbackIdentifier=author_science_feedback_id).first() author_content = AuthorContent.create_or_modify({ '__SEARCH_BY__': ['authorId', 'contentId'], 'authorId': humanize(author.id), 'contentId': humanize(quoting_content.id) }) quoting_content.authorContents = quoting_content.authorContents + [author_content] quoted_claim = Claim.query.filter_by( scienceFeedbackIdentifier=reviewed_items[0]).first() quoted_content = None if not quoted_claim: quoted_content = Content.query.filter_by( scienceFeedbackIdentifier=reviewed_items[0]).first() if not quoted_claim and not quoted_content: return None testifier_science_feedback_ids = row.get('Verified by') if not testifier_science_feedback_ids: return None testifier = User.query.filter_by( scienceFeedbackIdentifier=testifier_science_feedback_ids[0]).first() if not testifier: return None if IS_DEVELOPMENT: quoting_content.externalThumbUrl = API_URL + '/static/logo.png' if IS_DEVELOPMENT else None quoting_content.title = "/".join(quoting_content.url .replace('http://', '') \ .replace('https://', '') \ .split('/')[-2:]) \ .replace('-', ' ') appearance_dict = { '__SEARCH_BY__': 'scienceFeedbackIdentifier', 'quotedClaim': quoted_claim, 'quotedContent': quoted_content, 'quotingContent': quoting_content, 'scienceFeedbackIdentifier': row['airtableId'], 'testifier': testifier } return Appearance.create_or_modify(appearance_dict)