Ejemplo n.º 1
0
def get_index_buttons(song_type):
    # data to be returned as json
    json_data = {}
    
    # create path
    path = SONG_CATEGORIES_PATH_FORMAT % song_type
    
    # make http GET request
    r = requests.get(URL_FORMAT % path)
    log('index buttons request sent for: %s' % path)
        
    # create BeautifulSoup object out of html content
    soup = BeautifulSoup(r.content, "html.parser")
    
    # extract index button div
    index_buttons = soup.find('div', {'class' : 'index-buttons'})
    log('index buttons found')
    
    # extract all links from the div
    links = Utils.extract_links(index_buttons)
    
    # add links to json_data to be returned
    json_data[BUTTONS] = links

    return json.dumps(json_data, sort_keys=False)
Ejemplo n.º 2
0
    def test_extract_links(self):

        # set up the mocks
        container_mock = create_autospec(BeautifulSoup)
        element_mock1 = create_autospec(BeautifulSoup)
        element_mock2 = create_autospec(BeautifulSoup)
        element_mock3 = create_autospec(BeautifulSoup)
        element_mock4 = create_autospec(BeautifulSoup)
        
        # populate mocks with data
        i = 1
        for element in [element_mock1, element_mock2, element_mock3, element_mock4]:
            text = create_autospec(str)
            text.strip.return_value = 'element_mock' + str(i)
            element.text = text
            element.get.return_value = 'link' + str(i)
            i += 1
        
        container_mock.findAll.return_value = [element_mock1, element_mock2, element_mock3]

        # mock out clear_children call
        funct = Utils.clear_children
        Utils.clear_children = create_autospec(Utils.clear_children)

        # call without clear_children
        search_results = Utils.extract_links(container_mock, should_clear_children = False)
        
        # call with clear_children
        search_results = Utils.extract_links(container_mock, should_clear_children = True)

        test_results = [{'name' : 'element_mock1', 'path' : 'link1'},
                        {'name' : 'element_mock2', 'path' : 'link2'},
                        {'name' : 'element_mock3', 'path' : 'link3'}]
        assert search_results == test_results
        
        # assert that Utils.clear_children was called wich each mock element
        # http://www.voidspace.org.uk/python/mock/mock.html#mock.Mock.assert_has_calls
        # http://www.voidspace.org.uk/python/mock/helpers.html#calls-as-tuples
        calls = [call(element_mock1), call(element_mock2), call(element_mock3)]
        Utils.clear_children.assert_has_calls(calls)
        assert Utils.clear_children.call_count == 3
        
        calls = [call('a'), call('a')]
        container_mock.findAll.assert_has_calls(calls)
        container_mock.findAll.call_count == 3
        
        calls = [call(), call()]
        element_mock1.text.strip.assert_has_calls(calls)
        assert element_mock1.text.strip.call_count == 2
        element_mock2.text.strip.assert_has_calls(calls)
        assert element_mock2.text.strip.call_count == 2
        element_mock3.text.strip.assert_has_calls(calls)
        assert element_mock3.text.strip.call_count == 2
        assert not element_mock4.text.strip.called, 'element_mock4.text.strip was called and should not have been'
        
        calls = [call('href'), call('href')]
        element_mock1.get.assert_has_calls(calls)
        assert element_mock1.get.call_count == 2
        element_mock2.get.assert_has_calls(calls)
        assert element_mock2.get.call_count == 2
        element_mock3.get.assert_has_calls(calls)
        assert element_mock3.get.call_count == 2
        assert not element_mock4.get.called, 'element_mock4.get was called and should not have been'
            
        # reset extract_links back to it's original value
        Utils.clear_children = funct
Ejemplo n.º 3
0
def get_list_scripture(testament):
    # make testament lower case if it isn't already
    testament = testament.lower()
    
    # data to be returned as json
    json_data = {}
    
    # hymnal.net path to list of scripture songs
    path = SCRIPTURE_PATH_FORMAT % testament

    # make http GET request
    r = requests.get(URL_FORMAT % path)
    log('request sent for: %s' % path)

    # create BeautifulSoup object out of html content
    soup = BeautifulSoup(r.content, "html.parser")

    # find all div's with class 'panel-default,' which is the div containing the bible book name
    book_divs = soup.findAll('div', {'class': 'panel-default'})
    
    # list of bible books with songs about them
    books = []
    
    for book_div in book_divs:
        # book object with attributes book_name and book_content
        book = {}
        
        # name of the book
        book[BOOK_NAME] = book_div.text.strip()

        # list of chapters each containing songs from that chapter
        book_content = []

        # initialize next_div to increment in the loop
        next_div = book_div

        # loop will break when there are no more siblings with verse references
        while True:
            
            # increment to next sibling
            next_div = next_div.next_sibling
            
            try:
                # find div with the verse reference
                verse_ref_div = next_div.find('div',{'class' : 'verse-ref'})
                
                # extract name and link of verse reference
                verse_ref = Utils.extract_links(verse_ref_div, path_key = LINK, should_clear_children = False)[0]
                
                # div of the songs that are from that verse reference
                songs_div = verse_ref_div.next_sibling
                
                # if songs_div is a space or new line, skip and go to next sibling
                if len(str(songs_div).strip()) == 0:
                    songs_div = songs_div.next_sibling
                
                # extract song name and path
                songs = Utils.extract_links(songs_div)

                # chapter dictionary with attributes verse_reference and a list of songs from that verse reference
                chapter = {}
                chapter[VERSE_REF] = verse_ref
                chapter[SONGS] = songs
                book_content.append(chapter)
            except TypeError:
                # indicates that the div was just a space or new line, so skip
                continue
            except AttributeError:
                # indicates that there are no more siblings with verse references, so break out of loop
                break

        book[BOOK_CONTENT] = book_content
        books.append(book)
    
    json_data[BOOKS] = books
    
    return json.dumps(json_data, sort_keys=False)
Ejemplo n.º 4
0
def get_hymn_internal(hymn_type, hymn_number, additional_args):
    
    # whether or not we need to check if the song exists.
    check_exists = additional_args.get('check_exists', type=bool)

    # if there are any additional query parameters, then pass it directly to hymnal.net
    if 'check_exists' in additional_args:
        del additional_args['check_exists']

    # create path by plugging in the hymn type and number and appending all query params
    path = HYMN_PATH_FORMAT % (hymn_type, hymn_number)
    # make http GET request to song path
    r = requests.get(Utils.add_query_to_url(GET_SONG_URL_FORMAT % path, additional_args))
    log('request sent for: %s' % path)
    
    # create BeautifulSoup object out of html content
    soup = BeautifulSoup(r.text, "html.parser")

    # If the song doesn't exist, hymnal.net will randomly generate a song that doesn't make sense.
    # However, it does it at run time, meaning if you request it twice, it'll have a different title.
    if check_exists:
        r2 = requests.get(GET_SONG_URL_FORMAT % path)
        soup2 = BeautifulSoup(r2.content, "html.parser")
        if soup2.title != soup.title:
            message = {Constants.PUBLIC : Constants.NOT_REAL_SONG % (hymn_type, hymn_number)}
            message['status_code'] = 400
            return (json.dumps(message), 400)

    # data to be returned as json
    json_data = {}
    
    # fill in title
    json_data[soup.title.name] = soup.title.string
    
    # extract meta data (Category, Subcategory, etc)
    meta_data = []
    # meta data contained in side bar
    sidebar = soup.find('div',{'class':'sidebar'})
    # info is in divs with common-panel
    meta_data_divs = sidebar.findChildren('div',{'class':'common-panel'})
    for div in meta_data_divs:
        # search by CSS class
        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#searching-by-css-class
        labels = div.find_all('label', class_= 'col-xs-5')
        if len(labels) == 0:
            continue
        for label in labels:
            name = label.text.replace(':','')
            data = Utils.extract_links(label.findNextSibling(), name_key=VALUE)
            
            # append meta data to meta_data list if it doesn't exist already
            meta_data_object = get_meta_data_object(name, data)
            if meta_data_object not in meta_data:
                meta_data.append(meta_data_object)

    svg = extract_svg(soup)
    if svg is not None:
        meta_data.append(svg)

    json_data[META_DATA] = meta_data

    lyrics = []
    raw_lyrics = soup.find('div',{'class':'lyrics'})

    # for the songs with "View Lyrics (external site)"
    if raw_lyrics.find('div',{'class':'alert'}):
        # Only get the numerical number.
        # This is for when there is a new tune, such as #277b. The "b" doesn't matter when it comes to the lyrics.
        hymn_number = re.findall("\d+", hymn_number)[0]
        
        with open('stored/classic/{}.html'.format(hymn_number), 'r') as data:
            stored_content = data.read()
        content = re.compile(STORED_CLASSIC_LYRICS_REGEX, re.DOTALL).findall(stored_content)[0]
        # filter out the empty items and grab first non-empty item
        content = [str for str in content if str != ''][0]

        # create BeautifulSoup object out of html content
        external_soup = BeautifulSoup(content, "html.parser")
        
        stanza_content = []
        # indicates which stanza we are currently parsing
        stanza_num = 0

        # find all "div"s, which contains a verse or a chorus
        lyric_divs = external_soup.findAll("div")

        # creates a verse object with the stanza num and content
        verse = {}

        # keep track of the previous chorus so we know not to add it if it appears multiple times in a row
        previous_chorus = []

        for lyric_div in lyric_divs:
            # class name of div is "verse" or "chrous"
            isChorus = lyric_div.get("class")[0] == 'chorus'
            
            stanza_content = []
            
            for line in lyric_div.stripped_strings:
                # don't need to include the verse number in the result
                if (line.strip().isdigit()):
                    continue
                else:
                    stanza_content.append(line)

            if isChorus:
                # previous chrous is the same as the current chorus, so just reset everything and continue without appending to lyrics
                if previous_chorus == stanza_content:
                    # reset verse object for next verse
                    verse = {}
                    # reset stanza_content for good measure
                    stanza_content = []
                    continue
                else:
                    previous_chorus = stanza_content
                verse[VERSE_TYPE] = CHORUS
            else:
                verse[VERSE_TYPE] = VERSE
            verse[VERSE_CONTENT] = stanza_content

            # append finished stanza to lyrics hash
            lyrics.append(verse)
            # reset verse object for next verse
            verse = {}
            # reset stanza_content for good measure
            stanza_content = []
    else:
        for td in raw_lyrics.findAll('td'):
            stanza_content = []
        
            # skip td if it is empty or is just a number
            if len(td.text.strip()) == 0 or td.text.strip().isdigit():
                continue
 
            # for each line in the stanza, append to stanza list
            for line in td.strings:
                stanza_content.append(line)
            
            # create and populate verse object with verse_type and verse_content
            verse = {}
            if td.get('class') and 'chorus' in td.get('class'):
                verse[VERSE_TYPE] = CHORUS
            elif td.get('class') and 'copyright' in td.get('class'):
                verse[VERSE_TYPE] = OTHER
            elif td.get('class') and 'note' in td.get('class'):
                verse[VERSE_TYPE] = OTHER
            else:
                verse[VERSE_TYPE] = VERSE
            verse[VERSE_CONTENT] = stanza_content

            # append finished stanza to lyrics hash
            lyrics.append(verse)

    if Utils.has_transliteration(hymn_type, hymn_number):
        for lyric in lyrics:
            # split the original characters, then transliterate and add a space between them
            chars = [list(line) for line in lyric[VERSE_CONTENT]]
            lyric[VERSE_TRANSLITERATION] = [' '.join([pinyin.get(char) for char in char_list]) for char_list in chars]

    json_data[LYRICS] = lyrics

    return json.dumps(json_data, sort_keys=True)