def test_applies_parse_links(self):
     with patch('sheerlike.external_links.parse_links') as parse_links:
         process_external_links(self.doc)
         parse_links.assert_has_calls(
             list(map(call, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'])),
             any_order=True
         )
 def test_applies_parse_links(self):
     with patch('sheerlike.external_links.parse_links') as parse_links:
         process_external_links(self.doc)
         parse_links.assert_has_calls(
             list(map(call, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'])),
             any_order=True
         )
    def test_applies_convert_http_image_links(self):
        url_mappings = [
            ('http://foo.bucket/', 'https://s3.amazonaws.com/foo.bucket/'),
        ]

        with patch('sheerlike.external_links.convert_http_image_links',
                   return_value='html') as convert:
            process_external_links(self.doc)
            convert.assert_has_calls([
                call(x, url_mappings)
                for x in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
            ],
                                     any_order=True)
Exemple #4
0
def process_orgmember(member):
    del member['comments']
    member['_id'] = member['slug']
    if member['parent'] != 0:
        member['has_parent'] = True
    else:
        member['has_parent'] = False
    if member['taxonomy_orgmember_cat']:
        member['category'] = member['taxonomy_orgmember_cat'][0][
            'title'].replace('&', '&')
    if member['custom_fields'].get('name'):
        member['name'] = member['custom_fields'].get('name')
    else:
        member['name'] = ''
    if member['custom_fields'].get('titles'):
        member['titles'] = member['custom_fields'].get('titles')
    else:
        member['titles'] = [
            member['custom_fields'].get(title)
            for title in ['titles_0', 'titles_1']
            if member['custom_fields'].get(title)
        ]
    del member['custom_fields']

    member = process_external_links(member)

    return {'_type': 'orgmember', '_id': member['slug'], '_source': member}
def process_post(post):
    post['_id'] = post['slug']
    post['author'] = [post['author']['name']]
    names = [
        'og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
        'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content',
        'links'
    ]
    for name in names:
        if name in post['custom_fields']:
            post[name] = post['custom_fields'][name]
    if 'links' not in post['custom_fields']:
        links = []
        for x in range(10):
            key = 'links_%s' % x
            if key in post['custom_fields']:
                if isinstance(post['custom_fields'][key], basestring):
                    links.append({'url': post['custom_fields'][key]})
                else:
                    links.append({
                        'url': post['custom_fields'][key][0],
                        'label': post['custom_fields'][key][1]
                    })
        post['links'] = links

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'featured_topic', '_id': post['slug'], '_source': post}
def process_history(item):

    del item['comments']
    item['_id'] = item['slug']

    if item['parent'] != 0:
        # This is an individual history point
        item['has_parent'] = True
        if item['custom_fields'].get('item_date'):
            item['item_date'] = item['custom_fields']['item_date']
    else:
        # This is history section
        item['has_parent'] = False
        if item['custom_fields'].get('section_date_from'):
            item['section_date_from'] = \
                item['custom_fields']['section_date_from']
        if item['custom_fields'].get('section_date_to'):
            item['section_date_to'] = item['custom_fields']['section_date_to']

    del item['custom_fields']

    item = process_external_links(item)

    return {'_type': 'history',
            '_id': item['slug'],
            '_source': item}
Exemple #7
0
def process_post(post):
    del post['comments']
    post['_id'] = post['slug']
    post['category'] = [cat['title'].replace('&', '&')
                        for cat in
                        post['taxonomy_cfpb_newsroom_cat_taxonomy']]
    post['author'] = [author['title'] for author in
                      post['taxonomy_fj_author'] if 'Press Release' not in
                      post['category']]
    post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
    for name in ['author', 'tags']:
        for i, string in enumerate(post[name]):
            if not string.isalnum():
                for char in string:
                    if not char.isalnum() and not char.isspace() and not char == '-':
                        post[name][i] = string.replace(char, '')

    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term',
             'utm_content', 'dsq_thread_id', 'alt_title']
    for name in names:
        if name in post['custom_fields']:
            post[name] = post['custom_fields'][name]

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'newsroom',
            '_id': post['slug'],
            '_source': post}
def process_orgmember(member):
    del member['comments']
    member['_id'] = member['slug']
    if member['parent'] != 0:
        member['has_parent'] = True
    else:
        member['has_parent'] = False
    if member['taxonomy_orgmember_cat']:
        member['category'] = member['taxonomy_orgmember_cat'][0]['title'].replace('&', '&')
    if member['custom_fields'].get('name'):
        member['name'] = member['custom_fields'].get('name')
    else:
        member['name'] = ''
    if member['custom_fields'].get('titles'):
        member['titles'] = member['custom_fields'].get('titles')
    else:
        member['titles'] = [member['custom_fields'].get(title) for title in
                           ['titles_0', 'titles_1']
                            if member['custom_fields'].get(title)]
    del member['custom_fields']

    member = process_external_links(member)

    return {'_type': 'orgmember',
            '_id': member['slug'],
            '_source': member}
def process_post(post):

    post['_id'] = post['slug']

    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term',
             'utm_content', 'faq']
    for name in names:
        if name in post['custom_fields']:
            post[name] = post['custom_fields'][name]
    if 'taxonomy_fj_tag' in post:
        post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
        for i, tag in enumerate(post['tags']):
            if not tag.isalnum():
                for char in tag:
                    if not char.isalnum() and not char.isspace() and not char == '-':
                        post['tags'][i] = tag.replace(char, '')

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'faq',
            '_id': post['slug'],
            '_source': post}
def process_post(post):
    post['_id'] = post['slug']
    post['author'] = [post['author']['name']]
    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term',
             'utm_content', 'links']
    for name in names:
        if name in post['custom_fields']:
            post[name] = post['custom_fields'][name]
    if 'links' not in post['custom_fields']:
        links = []
        for x in range(10):
            key = 'links_%s' % x
            if key in post['custom_fields']:
                if isinstance(post['custom_fields'][key], basestring):
                    links.append({'url': post['custom_fields'][key]})
                else:
                    links.append({'url': post['custom_fields'][key][0],
                                  'label': post['custom_fields'][key][1]})
        post['links'] = links

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'featured_topic',
            '_id': post['slug'],
            '_source': post}
Exemple #11
0
def process_event(event):
    event['_id'] = event['id']
    dt_start = dateutil.parser.parse(event['dtstart'])

    # TODO: The times passed in are correct & in ET,
    # but we want to treat them like we're treating the incorrect times
    # in our db so that the fix for that is consistent. We can fix this
    # when Wagtail fixes https://github.com/torchbox/wagtail/issues/2406
    # along with https://github.com/cfpb/cfgov-refresh/pull/1661
    dt_start = dt_start.astimezone(timezone('America/New_York'))
    event['date'] = dt_start.isoformat()
    event['dtstart'] = event['date']

    dt_end = dateutil.parser.parse(event['dtend'])
    dt_end = dt_end.astimezone(timezone('America/New_York'))
    event['dtend'] = dt_end.isoformat()

    event['day'] = datetime.date(dt_start.year, dt_start.month, dt_start.day)
    if event['description']:
        if event['description'].strip() == '':
            del event['description']
        else:
            event['description'] = event['description'].strip()

    event = process_external_links(event)

    return {'_type': 'calendar_event', '_id': event['id'], '_source': event}
def process_contact(contact):
    del contact['comments']
    contact['_id'] = contact['slug']

    names = ['email', 'phone', 'fax']
    for name in names:
        if name in contact['custom_fields']:
            contact[name] = contact['custom_fields'][name]
        else:
            if name is 'fax':
                contact[name] = {}
                if 'fax_num' in contact['custom_fields']:
                    contact[name]['num'] = \
                        contact['custom_fields']['fax_num']
                if 'fax_desc' in contact['custom_fields']:
                    contact[name]['desc'] = \
                        contact['custom_fields']['fax_desc']
                if not contact[name]:
                    del contact[name]
            else:
                contact[name] = []
                for i in range(3):
                    contact[name].append({})
                    if '%s_%s_addr' % (name, i) in contact['custom_fields']:
                        contact[name][i]['addr'] = \
                            contact['custom_fields']['%s_%s_addr' % (name, i)]
                    elif '%s_%s_num' % (name, i) in contact['custom_fields']:
                        contact[name][i]['num'] = \
                            contact['custom_fields']['%s_%s_num' % (name, i)]
                    if '%s_%s_desc' % (name, i) in contact['custom_fields']:
                        contact[name][i]['desc'] = \
                            contact['custom_fields']['%s_%s_desc' % (name, i)]
                    if not contact[name][-1]:
                        contact[name].pop()
    names = [
        'sitewide_desc', 'attn', 'street', 'city', 'state', 'zip_code',
        'addr_desc'
    ]
    for name in names:
        if name in contact['custom_fields']:
            contact[name] = contact['custom_fields'][name]

    if 'web' and 'web_0' in contact['custom_fields']:
        del contact['custom_fields']['web_0']
    elif 'web_0' in contact['custom_fields']:
        contact['custom_fields']['web'] = contact['custom_fields']['web_0']
    if 'web' in contact['custom_fields']:
        if 'url' not in contact['custom_fields']['web'] or \
                'label' not in contact['custom_fields']['web']:
            contact['web'] = {}
            contact['web']['url'] = contact['custom_fields']['web']
        else:
            contact['web'] = contact['custom_fields']['web']

    del contact['custom_fields']

    contact = process_external_links(contact)

    return {'_type': 'contact', '_id': contact['slug'], '_source': contact}
def process_view(post):
    post['_id'] = post['slug']
    custom_fields = post['custom_fields']

    # limit popular posts to five items
    if 'popular_posts' in custom_fields:
        if isinstance(custom_fields['popular_posts'], basestring):
            post['popular_posts'] = [custom_fields['popular_posts']]
        else:
            popular_posts = [slug for slug in custom_fields['popular_posts'][:5]]
            post['popular_posts'] = popular_posts

    # convert related links into a proper list
    if 'related_links' in post['custom_fields']:
        post['related_links'] = post['custom_fields']['related_links']
    else:
        related = []
        for x in range(5):
            key = 'related_links_%s' % x
            if key in custom_fields:
                if isinstance(custom_fields[key], basestring):
                    related.append({'url': post['custom_fields'][key]})
                else:
                    related.append({'url': post['custom_fields'][key][0],
                                    'label': post['custom_fields'][key][1]})
        post['related_links'] = related

    # append the hero information
    if 'related_hero' in custom_fields:
        if isinstance(custom_fields['related_hero'], basestring):
            hero_id = custom_fields['related_hero']
        else:
            hero_id = custom_fields['related_hero'][0]
        if hero_id:
            hero_url = os.path.expandvars('$WORDPRESS/hero/' + hero_id + '/?json=1')
            response = requests.get(hero_url)
            hero_data = json.loads(response.content)
            if hero_data['status'] is 'ok':
                hero_data = hero_data['post']
                if 'related_post' in hero_data['custom_fields']:
                    hero_data['related_posts'] = \
                        [p for p in hero_data['custom_fields']['related_post'] if p]
                post['hero'] = hero_data

    # convert other custom fields
    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content',
             'alt_title']
    for name in names:
        if name in post['custom_fields']:
            post[name] = post['custom_fields'][name]

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'views',
            '_id': post['slug'],
            '_source': post}
def process_post(page):

    del page['comments']
    page['_id'] = page['id']

    page = process_external_links(page)

    return {'_type': 'report', '_id': page['id'], '_source': page}
    def test_applies_convert_http_image_links(self):
        url_mappings = [
            ('http://foo.bucket/', 'https://s3.amazonaws.com/foo.bucket/'),
        ]

        with patch(
            'sheerlike.external_links.convert_http_image_links',
            return_value='html'
        ) as convert:
            process_external_links(self.doc)
            convert.assert_has_calls(
                [
                    call(x, url_mappings)
                    for x in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
                ],
                any_order=True
            )
def process_post(page):

    del page['comments']
    page['_id'] = page['id']

    page = process_external_links(page)

    return {'_type': 'pages',
            '_id': page['id'],
            '_source': page}
def process_sub_page(post):

    del post['comments']
    post['_id'] = post['slug']
    custom_fields = post['custom_fields']

    names = [
        'og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
        'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content',
        'show_in_office', 'use_filtered_feed', 'use_form', 'body_content',
        'related_links', 'short_title', 'preview_text'
    ]
    for name in names:
        if name in custom_fields:
            post[name] = custom_fields[name]
    if 'related_faq' in custom_fields:
        if isinstance(custom_fields['related_faq'], basestring):
            post['related_faq'] = custom_fields['related_faq']
        else:
            post['related_faq'] = custom_fields['related_faq'][0]
    if 'related_links' not in post:
        related = []
        for x in range(5):
            key = 'related_links_%s' % x
            if key in custom_fields:
                related.append({
                    'url': custom_fields[key][0],
                    'label': custom_fields[key][1]
                })
        if related:
            post['related_links'] = related
    del post['custom_fields']

    if 'taxonomy_fj_tag' in post:
        post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
        for i, tag in enumerate(post['tags']):
            if not tag.isalnum():
                for char in tag:
                    if not char.isalnum() and not char.isspace(
                    ) and not char == '-':
                        post['tags'][i] = tag.replace(char, '')

    if post['parent'] != 0:
        post['has_parent'] = True
    else:
        post['has_parent'] = False

    post = process_external_links(post)

    return {'_type': 'sub_page', '_id': post['slug'], '_source': post}
Exemple #18
0
def process_career(career):
    career['_id'] = career['id']
    for salary in [
            'salary_' + m for m in ['max', 'min'] if career['salary_' + m]
    ]:
        career[salary] = float(career[salary])
    if 'applicant_types' in career:
        for ap_types in career['applicant_types']:
            if 'application_type' in ap_types:
                ap_types['application_type']['name'] = \
                    ap_types['application_type']['applicant_type']
                del ap_types['application_type']['applicant_type']
    career = process_external_links(career)
    return {'_type': 'career', '_id': career['id'], '_source': career}
def process_career(career):
    career['_id'] = career['id']
    for salary in ['salary_' + m for m in ['max', 'min']
                   if career['salary_' + m]]:
        career[salary] = float(career[salary])
    if 'applicant_types' in career:
        for ap_types in career['applicant_types']:
            if 'application_type' in ap_types:
                ap_types['application_type']['name'] = \
                    ap_types['application_type']['applicant_type']
                del ap_types['application_type']['applicant_type']
    career = process_external_links(career)
    return {'_type': 'career',
            '_id': career['id'],
            '_source': career}
Exemple #20
0
def process_post(post):
    del post['comments']
    post['_id'] = post['slug']
    post['blog_category'] = [cat['title'].replace('&', '&') for cat in
                             post['taxonomy_fj_category']]
    post['category'] = ['Blog']
    post['author'] = [author['title'] for author in
                      post['taxonomy_fj_author']]
    post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
    for name in ['author', 'tags']:
        for i, string in enumerate(post[name]):
            if not string.isalnum():
                for char in string:
                    if not char.isalnum() and not char.isspace() and not char == '-':
                        post[name][i] = string.replace(char, '')

    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term',
             'utm_content', 'alt_title', 'popular_posts',
             'show_featured_image_in_post', 'display_in_newsroom',
             'related_links', 'dsq_needs_sync', 'dsq_thread_id']
    for name in names:
        if name in post['custom_fields']:
            post[name] = post['custom_fields'][name]

    if 'related_hero' in post['custom_fields']:
        if isinstance(post['custom_fields']['related_hero'], basestring):
            post['related_hero'] = post['custom_fields']['related_hero']
        else:
            post['related_hero'] = post['custom_fields']['related_hero'][0]

    if 'related_links' not in post:
        related = []
        for x in range(5):
            key = 'related_links_%s' % x
            if key in post['custom_fields']:
                related.append({'url': post['custom_fields'][key][0],
                                'label': post['custom_fields'][key][1]})
        post['related_links'] = related

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'posts',
            '_id': post['slug'],
            '_source': post}
def process_sub_page(post):

    del post['comments']
    post['_id'] = post['slug']
    custom_fields = post['custom_fields']

    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term',
             'utm_content', 'show_in_office', 'use_filtered_feed', 'use_form',
             'body_content', 'related_links', 'short_title', 'preview_text']
    for name in names:
        if name in custom_fields:
            post[name] = custom_fields[name]
    if 'related_faq' in custom_fields:
        if isinstance(custom_fields['related_faq'], basestring):
            post['related_faq'] = custom_fields['related_faq']
        else:
            post['related_faq'] = custom_fields['related_faq'][0]
    if 'related_links' not in post:
        related = []
        for x in range(5):
            key = 'related_links_%s' % x
            if key in custom_fields:
                related.append({'url': custom_fields[key][0],
                                'label': custom_fields[key][1]})
        if related:
            post['related_links'] = related
    del post['custom_fields']

    if 'taxonomy_fj_tag' in post:
        post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
        for i, tag in enumerate(post['tags']):
            if not tag.isalnum():
                for char in tag:
                    if not char.isalnum() and not char.isspace() and not char == '-':
                        post['tags'][i] = tag.replace(char, '')

    if post['parent'] != 0:
        post['has_parent'] = True
    else:
        post['has_parent'] = False

    post = process_external_links(post)

    return {'_type': 'sub_page',
            '_id': post['slug'],
            '_source': post}
def process_history(item):

    del item['comments']
    item['_id'] = item['slug']

    if item['parent'] != 0:
        # This is an individual history point
        item['has_parent'] = True
        if item['custom_fields'].get('item_date'):
            item['item_date'] = item['custom_fields']['item_date']
    else:
        # This is history section
        item['has_parent'] = False
        if item['custom_fields'].get('section_date_from'):
            item['section_date_from'] = \
                item['custom_fields']['section_date_from']
        if item['custom_fields'].get('section_date_to'):
            item['section_date_to'] = item['custom_fields']['section_date_to']

    del item['custom_fields']

    item = process_external_links(item)

    return {'_type': 'history', '_id': item['slug'], '_source': item}
 def test_converts_http_s3_link(self):
         doc = '<img src="http://foo.bucket/img.png"/>'
         self.assertEqual(
             process_external_links(doc),
             '<img src="https://s3.amazonaws.com/foo.bucket/img.png"/>'
         )
def process_office(post):

    post['_id'] = post['slug']
    custom_fields = post['custom_fields']

    # get intro text & subscribe form data from custom fields
    intro = {}
    for attr in ['intro_text', 'intro_subscribe_form', 'intro_govdelivery_code']:
        if attr in custom_fields:
            new_attr = attr.replace('intro_', '')
            intro[new_attr] = custom_fields[attr]
    if intro:
        post['intro'] = intro

    # build top story dict
    top_story = {}
    for attr in ['top_story_head', 'top_story_desc']:
        if attr in custom_fields:
            new_attr = attr.replace('top_story_', '')
            top_story[new_attr] = custom_fields[attr]

    # convert top story links into a proper list
    if 'top_story_links' in custom_fields:
        top_story['links'] = custom_fields['top_story_links']
    else:
        top_story_links = []
        for x in range(5):
            key = 'top_story_links_%s' % x
            if key in custom_fields:
                top_story_links.append({'url': custom_fields[key][0],
                                        'label': custom_fields[key][1]})

        if top_story_links:
            top_story['links'] = top_story_links

    if top_story:
        post['top_story'] = top_story

    # create list of office resource dicts
    if 'resources' in custom_fields:
        post['resources'] = custom_fields['resources']
    else:
        post['resources'] = []
        for x in range(4):
            resource = {}
            fields = ['title', 'desc', 'icon', 'link']
            for field in fields:
                field_name = 'resource_%s_%s' % (str(x), field)
                if field_name in custom_fields and custom_fields[field_name]:
                    if field == 'link':
                        resource[field] = \
                            {'url': custom_fields[field_name][0],
                             'label': custom_fields[field_name][1]}
                    else:
                        resource[field] = custom_fields[field_name]

            if resource:
                post['resources'].append(resource)

    # add other custom fields
    names = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
             'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term',
             'utm_content', 'short_title', 'related_sub_pages']
    for name in names:
        if name in custom_fields:
            post[name] = custom_fields[name]

    for related in ['related_hero', 'related_contact', 'related_faq']:
        if related in custom_fields:
            if isinstance(custom_fields[related], basestring):
                post[related] = custom_fields[related]
            else:
                post[related] = custom_fields[related][0]

    post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
    for i, tag in enumerate(post['tags']):
        if not tag.isalnum():
            for char in tag:
                if not char.isalnum() and not char.isspace() and not char == '-':
                    post['tags'][i] = tag.replace(char, '')

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'office',
            '_id': post['slug'],
            '_source': post}
def process_office(post):

    post['_id'] = post['slug']
    custom_fields = post['custom_fields']

    # get intro text & subscribe form data from custom fields
    intro = {}
    for attr in [
            'intro_text', 'intro_subscribe_form', 'intro_govdelivery_code'
    ]:
        if attr in custom_fields:
            new_attr = attr.replace('intro_', '')
            intro[new_attr] = custom_fields[attr]
    if intro:
        post['intro'] = intro

    # build top story dict
    top_story = {}
    for attr in ['top_story_head', 'top_story_desc']:
        if attr in custom_fields:
            new_attr = attr.replace('top_story_', '')
            top_story[new_attr] = custom_fields[attr]

    # convert top story links into a proper list
    if 'top_story_links' in custom_fields:
        top_story['links'] = custom_fields['top_story_links']
    else:
        top_story_links = []
        for x in range(5):
            key = 'top_story_links_%s' % x
            if key in custom_fields:
                top_story_links.append({
                    'url': custom_fields[key][0],
                    'label': custom_fields[key][1]
                })

        if top_story_links:
            top_story['links'] = top_story_links

    if top_story:
        post['top_story'] = top_story

    # create list of office resource dicts
    if 'resources' in custom_fields:
        post['resources'] = custom_fields['resources']
    else:
        post['resources'] = []
        for x in range(4):
            resource = {}
            fields = ['title', 'desc', 'icon', 'link']
            for field in fields:
                field_name = 'resource_%s_%s' % (str(x), field)
                if field_name in custom_fields and custom_fields[field_name]:
                    if field == 'link':
                        resource[field] = \
                            {'url': custom_fields[field_name][0],
                             'label': custom_fields[field_name][1]}
                    else:
                        resource[field] = custom_fields[field_name]

            if resource:
                post['resources'].append(resource)

    # add other custom fields
    names = [
        'og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_lang',
        'twtr_rel', 'twtr_hash', 'utm_campaign', 'utm_term', 'utm_content',
        'short_title', 'related_sub_pages'
    ]
    for name in names:
        if name in custom_fields:
            post[name] = custom_fields[name]

    for related in ['related_hero', 'related_contact', 'related_faq']:
        if related in custom_fields:
            if isinstance(custom_fields[related], basestring):
                post[related] = custom_fields[related]
            else:
                post[related] = custom_fields[related][0]

    post['tags'] = [tag['title'] for tag in post['taxonomy_fj_tag']]
    for i, tag in enumerate(post['tags']):
        if not tag.isalnum():
            for char in tag:
                if not char.isalnum() and not char.isspace(
                ) and not char == '-':
                    post['tags'][i] = tag.replace(char, '')

    del post['custom_fields']

    post = process_external_links(post)

    return {'_type': 'office', '_id': post['slug'], '_source': post}
Exemple #26
0
def process_event(event):
    """
    Process an event as provided by the WordPress API and return
    JSON suitable for indexing in Elasticsearch.
    """
    del event['comments']
    event['_id'] = event['slug']
    custom_fields = event['custom_fields']

    # Reassign data out of custom fields
    event['tags'] = [tag['title'] for tag in event['taxonomy_fj_tag']
                     if event['taxonomy_fj_tag']]
    for i, tag in enumerate(event['tags']):
        if not tag.isalnum():
            for char in tag:
                if not char.isalnum() and not char.isspace() and not char == '-':
                    event['tags'][i] = tag.replace(char, '')
    event['open_graph'] = {}
    og_fields = ['og_title', 'og_image', 'og_desc', 'twtr_text', 'twtr_rel',
                 'twtr_lang', 'twtr_hash', 'utm_campaign', 'utm_term',
                 'utm_content']
    event_fields = ['rsvp', 'agenda', 'venue', 'archive', 'live', 'future',
                    'live_stream']
    for field in og_fields:
        if field in custom_fields and custom_fields[field]:
            event['open_graph'][field] = custom_fields[field]
    for field in event_fields:
        if field in custom_fields and custom_fields[field]:
            event[field] = custom_fields[field]

    if 'taxonomy_beginning_time' in event and event['taxonomy_beginning_time']:
        event['beginning_time'] = {}
        event['beginning_time']['date'] = \
            event['taxonomy_beginning_time'][0]['title']
        event['beginning_time']['timezone'] = \
            event['taxonomy_beginning_time'][0]['description']
    if 'taxonomy_ending_time' in event and event['taxonomy_ending_time']:
        event['ending_time'] = {}
        event['ending_time']['date'] = \
            event['taxonomy_ending_time'][0]['title']
        event['ending_time']['timezone'] = \
            event['taxonomy_ending_time'][0]['description']

    # Create ICS data dictionary
    event['ics'] = {}
    if 'title' in event:
        event['ics']['summary'] = event['title']
    if 'venue' in event:
        if 'city' and 'state' in event['venue']:
            event['ics']['location'] = "%s, %s" % (event['venue']['city'],
                                                   event['venue']['state'])
    if 'beginning_time' in event:
        event['ics']['dtstart'] = event['beginning_time']['date']
        event['ics']['starting_tzinfo'] = event['beginning_time']['timezone']
    if 'ending_time' in event:
        event['ics']['dtend'] = event['ending_time']['date']
        event['ics']['ending_tzinfo'] = event['ending_time']['timezone']
    ics_dict = {'date': 'dtstamp', 'relative_url': 'uid'}
    for wp_field, ics_field in ics_dict.items():
        if wp_field in event and event[wp_field]:
            event['ics'][ics_field] = event[wp_field]

    # Delete taxonomy data and custom fields
    del event['custom_fields']
    for key, value in event.items():
        if key.startswith('taxonomy'):
            del event[key]

    event = OrderedDict(sorted(event.items(), key=lambda k: k[0]))

    event = process_external_links(event)

    return {'_type': 'events',
            '_id': event['slug'],
            '_source': event}
 def test_converts_http_s3_link(self):
     doc = '<img src="http://foo.bucket/img.png"/>'
     self.assertEqual(
         process_external_links(doc),
         '<img src="https://s3.amazonaws.com/foo.bucket/img.png"/>')