def test_applies_parse_links(self): with patch('sheerlike.external_links.parse_links') as parse_links: process_external_links(self.doc) parse_links.assert_has_calls( list(map(call, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'])), any_order=True )
def test_applies_convert_http_image_links(self): url_mappings = [ ('http://foo.bucket/', 'https://s3.amazonaws.com/foo.bucket/'), ] with patch('sheerlike.external_links.convert_http_image_links', return_value='html') as convert: process_external_links(self.doc) convert.assert_has_calls([ call(x, url_mappings) for x in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i') ], any_order=True)
def process_orgmember(member): del member['comments'] member['_id'] = member['slug'] if member['parent'] != 0: member['has_parent'] = True else: member['has_parent'] = False if member['taxonomy_orgmember_cat']: member['category'] = member['taxonomy_orgmember_cat'][0][ 'title'].replace('&', '&') if member['custom_fields'].get('name'): member['name'] = member['custom_fields'].get('name') else: member['name'] = '' if member['custom_fields'].get('titles'): member['titles'] = member['custom_fields'].get('titles') else: member['titles'] = [ member['custom_fields'].get(title) for title in ['titles_0', 'titles_1'] if member['custom_fields'].get(title) ] del member['custom_fields'] member = process_external_links(member) return {'_type': 'orgmember', '_id': member['slug'], '_source': member}
def process_post(post): post['_id'] = post['slug'] post['author'] = [post['author']['name']] names = [ 'og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'links' ] for name in names: if name in post['custom_fields']: post[name] = post['custom_fields'][name] if 'links' not in post['custom_fields']: links = [] for x in range(10): key = 'links_%s' % x if key in post['custom_fields']: if isinstance(post['custom_fields'][key], basestring): links.append({'url': post['custom_fields'][key]}) else: links.append({ 'url': post['custom_fields'][key][0], 'label': post['custom_fields'][key][1] }) post['links'] = links del post['custom_fields'] post = process_external_links(post) return {'_type': 'featured_topic', '_id': post['slug'], '_source': post}
def process_history(item): del item['comments'] item['_id'] = item['slug'] if item['parent'] != 0: # This is an individual history point item['has_parent'] = True if item['custom_fields'].get('item_date'): item['item_date'] = item['custom_fields']['item_date'] else: # This is history section item['has_parent'] = False if item['custom_fields'].get('section_date_from'): item['section_date_from'] = \ item['custom_fields']['section_date_from'] if item['custom_fields'].get('section_date_to'): item['section_date_to'] = item['custom_fields']['section_date_to'] del item['custom_fields'] item = process_external_links(item) return {'_type': 'history', '_id': item['slug'], '_source': item}
def process_post(post): del post['comments'] post['_id'] = post['slug'] post['category'] = [cat['title'].replace('&', '&') for cat in post['taxonomy_cfpb_newsroom_cat_taxonomy']] post['author'] = [author['title'] for author in post['taxonomy_fj_author'] if 'Press Release' not in post['category']] post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for name in ['author', 'tags']: for i, string in enumerate(post[name]): if not string.isalnum(): for char in string: if not char.isalnum() and not char.isspace() and not char == '-': post[name][i] = string.replace(char, '') names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'dsq_thread_id', 'alt_title'] for name in names: if name in post['custom_fields']: post[name] = post['custom_fields'][name] del post['custom_fields'] post = process_external_links(post) return {'_type': 'newsroom', '_id': post['slug'], '_source': post}
def process_orgmember(member): del member['comments'] member['_id'] = member['slug'] if member['parent'] != 0: member['has_parent'] = True else: member['has_parent'] = False if member['taxonomy_orgmember_cat']: member['category'] = member['taxonomy_orgmember_cat'][0]['title'].replace('&', '&') if member['custom_fields'].get('name'): member['name'] = member['custom_fields'].get('name') else: member['name'] = '' if member['custom_fields'].get('titles'): member['titles'] = member['custom_fields'].get('titles') else: member['titles'] = [member['custom_fields'].get(title) for title in ['titles_0', 'titles_1'] if member['custom_fields'].get(title)] del member['custom_fields'] member = process_external_links(member) return {'_type': 'orgmember', '_id': member['slug'], '_source': member}
def process_post(post): post['_id'] = post['slug'] names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'faq'] for name in names: if name in post['custom_fields']: post[name] = post['custom_fields'][name] if 'taxonomy_fj_tag' in post: post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for i, tag in enumerate(post['tags']): if not tag.isalnum(): for char in tag: if not char.isalnum() and not char.isspace() and not char == '-': post['tags'][i] = tag.replace(char, '') del post['custom_fields'] post = process_external_links(post) return {'_type': 'faq', '_id': post['slug'], '_source': post}
def process_post(post): post['_id'] = post['slug'] post['author'] = [post['author']['name']] names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'links'] for name in names: if name in post['custom_fields']: post[name] = post['custom_fields'][name] if 'links' not in post['custom_fields']: links = [] for x in range(10): key = 'links_%s' % x if key in post['custom_fields']: if isinstance(post['custom_fields'][key], basestring): links.append({'url': post['custom_fields'][key]}) else: links.append({'url': post['custom_fields'][key][0], 'label': post['custom_fields'][key][1]}) post['links'] = links del post['custom_fields'] post = process_external_links(post) return {'_type': 'featured_topic', '_id': post['slug'], '_source': post}
def process_event(event): event['_id'] = event['id'] dt_start = dateutil.parser.parse(event['dtstart']) # TODO: The times passed in are correct & in ET, # but we want to treat them like we're treating the incorrect times # in our db so that the fix for that is consistent. We can fix this # when Wagtail fixes https://github.com/torchbox/wagtail/issues/2406 # along with https://github.com/cfpb/cfgov-refresh/pull/1661 dt_start = dt_start.astimezone(timezone('America/New_York')) event['date'] = dt_start.isoformat() event['dtstart'] = event['date'] dt_end = dateutil.parser.parse(event['dtend']) dt_end = dt_end.astimezone(timezone('America/New_York')) event['dtend'] = dt_end.isoformat() event['day'] = datetime.date(dt_start.year, dt_start.month, dt_start.day) if event['description']: if event['description'].strip() == '': del event['description'] else: event['description'] = event['description'].strip() event = process_external_links(event) return {'_type': 'calendar_event', '_id': event['id'], '_source': event}
def process_contact(contact): del contact['comments'] contact['_id'] = contact['slug'] names = ['email', 'phone', 'fax'] for name in names: if name in contact['custom_fields']: contact[name] = contact['custom_fields'][name] else: if name is 'fax': contact[name] = {} if 'fax_num' in contact['custom_fields']: contact[name]['num'] = \ contact['custom_fields']['fax_num'] if 'fax_desc' in contact['custom_fields']: contact[name]['desc'] = \ contact['custom_fields']['fax_desc'] if not contact[name]: del contact[name] else: contact[name] = [] for i in range(3): contact[name].append({}) if '%s_%s_addr' % (name, i) in contact['custom_fields']: contact[name][i]['addr'] = \ contact['custom_fields']['%s_%s_addr' % (name, i)] elif '%s_%s_num' % (name, i) in contact['custom_fields']: contact[name][i]['num'] = \ contact['custom_fields']['%s_%s_num' % (name, i)] if '%s_%s_desc' % (name, i) in contact['custom_fields']: contact[name][i]['desc'] = \ contact['custom_fields']['%s_%s_desc' % (name, i)] if not contact[name][-1]: contact[name].pop() names = [ 'sitewide_desc', 'attn', 'street', 'city', 'state', 'zip_code', 'addr_desc' ] for name in names: if name in contact['custom_fields']: contact[name] = contact['custom_fields'][name] if 'web' and 'web_0' in contact['custom_fields']: del contact['custom_fields']['web_0'] elif 'web_0' in contact['custom_fields']: contact['custom_fields']['web'] = contact['custom_fields']['web_0'] if 'web' in contact['custom_fields']: if 'url' not in contact['custom_fields']['web'] or \ 'label' not in contact['custom_fields']['web']: contact['web'] = {} contact['web']['url'] = contact['custom_fields']['web'] else: contact['web'] = contact['custom_fields']['web'] del contact['custom_fields'] contact = process_external_links(contact) return {'_type': 'contact', '_id': contact['slug'], '_source': contact}
def process_view(post): post['_id'] = post['slug'] custom_fields = post['custom_fields'] # limit popular posts to five items if 'popular_posts' in custom_fields: if isinstance(custom_fields['popular_posts'], basestring): post['popular_posts'] = [custom_fields['popular_posts']] else: popular_posts = [slug for slug in custom_fields['popular_posts'][:5]] post['popular_posts'] = popular_posts # convert related links into a proper list if 'related_links' in post['custom_fields']: post['related_links'] = post['custom_fields']['related_links'] else: related = [] for x in range(5): key = 'related_links_%s' % x if key in custom_fields: if isinstance(custom_fields[key], basestring): related.append({'url': post['custom_fields'][key]}) else: related.append({'url': post['custom_fields'][key][0], 'label': post['custom_fields'][key][1]}) post['related_links'] = related # append the hero information if 'related_hero' in custom_fields: if isinstance(custom_fields['related_hero'], basestring): hero_id = custom_fields['related_hero'] else: hero_id = custom_fields['related_hero'][0] if hero_id: hero_url = os.path.expandvars('$WORDPRESS/hero/' + hero_id + '/?json=1') response = requests.get(hero_url) hero_data = json.loads(response.content) if hero_data['status'] is 'ok': hero_data = hero_data['post'] if 'related_post' in hero_data['custom_fields']: hero_data['related_posts'] = \ [p for p in hero_data['custom_fields']['related_post'] if p] post['hero'] = hero_data # convert other custom fields names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'alt_title'] for name in names: if name in post['custom_fields']: post[name] = post['custom_fields'][name] del post['custom_fields'] post = process_external_links(post) return {'_type': 'views', '_id': post['slug'], '_source': post}
def process_post(page): del page['comments'] page['_id'] = page['id'] page = process_external_links(page) return {'_type': 'report', '_id': page['id'], '_source': page}
def test_applies_convert_http_image_links(self): url_mappings = [ ('http://foo.bucket/', 'https://s3.amazonaws.com/foo.bucket/'), ] with patch( 'sheerlike.external_links.convert_http_image_links', return_value='html' ) as convert: process_external_links(self.doc) convert.assert_has_calls( [ call(x, url_mappings) for x in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i') ], any_order=True )
def process_post(page): del page['comments'] page['_id'] = page['id'] page = process_external_links(page) return {'_type': 'pages', '_id': page['id'], '_source': page}
def process_sub_page(post): del post['comments'] post['_id'] = post['slug'] custom_fields = post['custom_fields'] names = [ 'og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'show_in_office', 'use_filtered_feed', 'use_form', 'body_content', 'related_links', 'short_title', 'preview_text' ] for name in names: if name in custom_fields: post[name] = custom_fields[name] if 'related_faq' in custom_fields: if isinstance(custom_fields['related_faq'], basestring): post['related_faq'] = custom_fields['related_faq'] else: post['related_faq'] = custom_fields['related_faq'][0] if 'related_links' not in post: related = [] for x in range(5): key = 'related_links_%s' % x if key in custom_fields: related.append({ 'url': custom_fields[key][0], 'label': custom_fields[key][1] }) if related: post['related_links'] = related del post['custom_fields'] if 'taxonomy_fj_tag' in post: post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for i, tag in enumerate(post['tags']): if not tag.isalnum(): for char in tag: if not char.isalnum() and not char.isspace( ) and not char == '-': post['tags'][i] = tag.replace(char, '') if post['parent'] != 0: post['has_parent'] = True else: post['has_parent'] = False post = process_external_links(post) return {'_type': 'sub_page', '_id': post['slug'], '_source': post}
def process_career(career): career['_id'] = career['id'] for salary in [ 'salary_' + m for m in ['max', 'min'] if career['salary_' + m] ]: career[salary] = float(career[salary]) if 'applicant_types' in career: for ap_types in career['applicant_types']: if 'application_type' in ap_types: ap_types['application_type']['name'] = \ ap_types['application_type']['applicant_type'] del ap_types['application_type']['applicant_type'] career = process_external_links(career) return {'_type': 'career', '_id': career['id'], '_source': career}
def process_career(career): career['_id'] = career['id'] for salary in ['salary_' + m for m in ['max', 'min'] if career['salary_' + m]]: career[salary] = float(career[salary]) if 'applicant_types' in career: for ap_types in career['applicant_types']: if 'application_type' in ap_types: ap_types['application_type']['name'] = \ ap_types['application_type']['applicant_type'] del ap_types['application_type']['applicant_type'] career = process_external_links(career) return {'_type': 'career', '_id': career['id'], '_source': career}
def process_post(post): del post['comments'] post['_id'] = post['slug'] post['blog_category'] = [cat['title'].replace('&', '&') for cat in post['taxonomy_fj_category']] post['category'] = ['Blog'] post['author'] = [author['title'] for author in post['taxonomy_fj_author']] post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for name in ['author', 'tags']: for i, string in enumerate(post[name]): if not string.isalnum(): for char in string: if not char.isalnum() and not char.isspace() and not char == '-': post[name][i] = string.replace(char, '') names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'alt_title', 'popular_posts', 'show_featured_image_in_post', 'display_in_newsroom', 'related_links', 'dsq_needs_sync', 'dsq_thread_id'] for name in names: if name in post['custom_fields']: post[name] = post['custom_fields'][name] if 'related_hero' in post['custom_fields']: if isinstance(post['custom_fields']['related_hero'], basestring): post['related_hero'] = post['custom_fields']['related_hero'] else: post['related_hero'] = post['custom_fields']['related_hero'][0] if 'related_links' not in post: related = [] for x in range(5): key = 'related_links_%s' % x if key in post['custom_fields']: related.append({'url': post['custom_fields'][key][0], 'label': post['custom_fields'][key][1]}) post['related_links'] = related del post['custom_fields'] post = process_external_links(post) return {'_type': 'posts', '_id': post['slug'], '_source': post}
def process_sub_page(post): del post['comments'] post['_id'] = post['slug'] custom_fields = post['custom_fields'] names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'show_in_office', 'use_filtered_feed', 'use_form', 'body_content', 'related_links', 'short_title', 'preview_text'] for name in names: if name in custom_fields: post[name] = custom_fields[name] if 'related_faq' in custom_fields: if isinstance(custom_fields['related_faq'], basestring): post['related_faq'] = custom_fields['related_faq'] else: post['related_faq'] = custom_fields['related_faq'][0] if 'related_links' not in post: related = [] for x in range(5): key = 'related_links_%s' % x if key in custom_fields: related.append({'url': custom_fields[key][0], 'label': custom_fields[key][1]}) if related: post['related_links'] = related del post['custom_fields'] if 'taxonomy_fj_tag' in post: post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for i, tag in enumerate(post['tags']): if not tag.isalnum(): for char in tag: if not char.isalnum() and not char.isspace() and not char == '-': post['tags'][i] = tag.replace(char, '') if post['parent'] != 0: post['has_parent'] = True else: post['has_parent'] = False post = process_external_links(post) return {'_type': 'sub_page', '_id': post['slug'], '_source': post}
def test_converts_http_s3_link(self): doc = '<img src="http://foo.bucket/img.png"/>' self.assertEqual( process_external_links(doc), '<img src="https://s3.amazonaws.com/foo.bucket/img.png"/>' )
def process_office(post): post['_id'] = post['slug'] custom_fields = post['custom_fields'] # get intro text & subscribe form data from custom fields intro = {} for attr in ['intro_text', 'intro_subscribe_form', 'intro_govdelivery_code']: if attr in custom_fields: new_attr = attr.replace('intro_', '') intro[new_attr] = custom_fields[attr] if intro: post['intro'] = intro # build top story dict top_story = {} for attr in ['top_story_head', 'top_story_desc']: if attr in custom_fields: new_attr = attr.replace('top_story_', '') top_story[new_attr] = custom_fields[attr] # convert top story links into a proper list if 'top_story_links' in custom_fields: top_story['links'] = custom_fields['top_story_links'] else: top_story_links = [] for x in range(5): key = 'top_story_links_%s' % x if key in custom_fields: top_story_links.append({'url': custom_fields[key][0], 'label': custom_fields[key][1]}) if top_story_links: top_story['links'] = top_story_links if top_story: post['top_story'] = top_story # create list of office resource dicts if 'resources' in custom_fields: post['resources'] = custom_fields['resources'] else: post['resources'] = [] for x in range(4): resource = {} fields = ['title', 'desc', 'icon', 'link'] for field in fields: field_name = 'resource_%s_%s' % (str(x), field) if field_name in custom_fields and custom_fields[field_name]: if field == 'link': resource[field] = \ {'url': custom_fields[field_name][0], 'label': custom_fields[field_name][1]} else: resource[field] = custom_fields[field_name] if resource: post['resources'].append(resource) # add other custom fields names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'short_title', 'related_sub_pages'] for name in names: if name in custom_fields: post[name] = custom_fields[name] for related in ['related_hero', 'related_contact', 'related_faq']: if related in custom_fields: if isinstance(custom_fields[related], basestring): post[related] = custom_fields[related] else: post[related] = custom_fields[related][0] post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for i, tag in enumerate(post['tags']): if not tag.isalnum(): for char in tag: if not char.isalnum() and not char.isspace() and not char == '-': post['tags'][i] = tag.replace(char, '') del post['custom_fields'] post = process_external_links(post) return {'_type': 'office', '_id': post['slug'], '_source': post}
def process_office(post): post['_id'] = post['slug'] custom_fields = post['custom_fields'] # get intro text & subscribe form data from custom fields intro = {} for attr in [ 'intro_text', 'intro_subscribe_form', 'intro_govdelivery_code' ]: if attr in custom_fields: new_attr = attr.replace('intro_', '') intro[new_attr] = custom_fields[attr] if intro: post['intro'] = intro # build top story dict top_story = {} for attr in ['top_story_head', 'top_story_desc']: if attr in custom_fields: new_attr = attr.replace('top_story_', '') top_story[new_attr] = custom_fields[attr] # convert top story links into a proper list if 'top_story_links' in custom_fields: top_story['links'] = custom_fields['top_story_links'] else: top_story_links = [] for x in range(5): key = 'top_story_links_%s' % x if key in custom_fields: top_story_links.append({ 'url': custom_fields[key][0], 'label': custom_fields[key][1] }) if top_story_links: top_story['links'] = top_story_links if top_story: post['top_story'] = top_story # create list of office resource dicts if 'resources' in custom_fields: post['resources'] = custom_fields['resources'] else: post['resources'] = [] for x in range(4): resource = {} fields = ['title', 'desc', 'icon', 'link'] for field in fields: field_name = 'resource_%s_%s' % (str(x), field) if field_name in custom_fields and custom_fields[field_name]: if field == 'link': resource[field] = \ {'url': custom_fields[field_name][0], 'label': custom_fields[field_name][1]} else: resource[field] = custom_fields[field_name] if resource: post['resources'].append(resource) # add other custom fields names = [ 'og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang', 'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content', 'short_title', 'related_sub_pages' ] for name in names: if name in custom_fields: post[name] = custom_fields[name] for related in ['related_hero', 'related_contact', 'related_faq']: if related in custom_fields: if isinstance(custom_fields[related], basestring): post[related] = custom_fields[related] else: post[related] = custom_fields[related][0] post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']] for i, tag in enumerate(post['tags']): if not tag.isalnum(): for char in tag: if not char.isalnum() and not char.isspace( ) and not char == '-': post['tags'][i] = tag.replace(char, '') del post['custom_fields'] post = process_external_links(post) return {'_type': 'office', '_id': post['slug'], '_source': post}
def process_event(event): """ Process an event as provided by the WordPress API and return JSON suitable for indexing in Elasticsearch. """ del event['comments'] event['_id'] = event['slug'] custom_fields = event['custom_fields'] # Reassign data out of custom fields event['tags'] = [tag['title'] for tag in event['taxonomy_fj_tag'] if event['taxonomy_fj_tag']] for i, tag in enumerate(event['tags']): if not tag.isalnum(): for char in tag: if not char.isalnum() and not char.isspace() and not char == '-': event['tags'][i] = tag.replace(char, '') event['open_graph'] = {} og_fields = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_rel', 'twtr_lang', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content'] event_fields = ['rsvp', 'agenda', 'venue', 'archive', 'live', 'future', 'live_stream'] for field in og_fields: if field in custom_fields and custom_fields[field]: event['open_graph'][field] = custom_fields[field] for field in event_fields: if field in custom_fields and custom_fields[field]: event[field] = custom_fields[field] if 'taxonomy_beginning_time' in event and event['taxonomy_beginning_time']: event['beginning_time'] = {} event['beginning_time']['date'] = \ event['taxonomy_beginning_time'][0]['title'] event['beginning_time']['timezone'] = \ event['taxonomy_beginning_time'][0]['description'] if 'taxonomy_ending_time' in event and event['taxonomy_ending_time']: event['ending_time'] = {} event['ending_time']['date'] = \ event['taxonomy_ending_time'][0]['title'] event['ending_time']['timezone'] = \ event['taxonomy_ending_time'][0]['description'] # Create ICS data dictionary event['ics'] = {} if 'title' in event: event['ics']['summary'] = event['title'] if 'venue' in event: if 'city' and 'state' in event['venue']: event['ics']['location'] = "%s, %s" % (event['venue']['city'], event['venue']['state']) if 'beginning_time' in event: event['ics']['dtstart'] = event['beginning_time']['date'] event['ics']['starting_tzinfo'] = event['beginning_time']['timezone'] if 'ending_time' in event: event['ics']['dtend'] = event['ending_time']['date'] event['ics']['ending_tzinfo'] = event['ending_time']['timezone'] ics_dict = {'date': 'dtstamp', 'relative_url': 'uid'} for wp_field, ics_field in ics_dict.items(): if wp_field in event and event[wp_field]: event['ics'][ics_field] = event[wp_field] # Delete taxonomy data and custom fields del event['custom_fields'] for key, value in event.items(): if key.startswith('taxonomy'): del event[key] event = OrderedDict(sorted(event.items(), key=lambda k: k[0])) event = process_external_links(event) return {'_type': 'events', '_id': event['slug'], '_source': event}
def test_converts_http_s3_link(self): doc = '<img src="http://foo.bucket/img.png"/>' self.assertEqual( process_external_links(doc), '<img src="https://s3.amazonaws.com/foo.bucket/img.png"/>')