Ejemplo n.º 1
0
    def assert_mock_get_hymn(self, hymn_type, hymn_number, stored_content_path = None, query_params = tuple()):
        stubbed_path = GetSong.HYMN_PATH_FORMAT % (hymn_type, hymn_number)

        # url to stub out
        url = GetSong.GET_SONG_URL_FORMAT % stubbed_path
        stubbed_url = Utils.add_query_to_url(url, query_params)
        
        # mock out hymnal.net response
        # https://docs.python.org/3/library/unittest.mock.html
        mock_response = Mock()
        mock_data_format = Utils.add_query_to_url('test_data/get_song_html_{}_{}', query_params)
        mock_data_format += '.txt'
        
        with open(mock_data_format.format(hymn_type, hymn_number), 'r') as m:
            mock_response.text = m.read()

        # key order doesn't matter for dict equality, so compare query parameter dicts
        def get_url(url):
            parsed_url = urllib.parse.urlparse(url)
            params = urllib.parse.parse_qsl(parsed_url.query)
            assert_equal(dict(query_params), dict(params))
            return mock_response

        # http://stackoverflow.com/questions/15753390/python-mock-requests-and-the-response
        with patch('requests.get', Mock(side_effect=get_url)) as n:
            self.assert_get_hymn(hymn_type, hymn_number, query_params, stored_content_path)
Ejemplo n.º 2
0
    def assert_get_hymn(self, hymn_type, hymn_number, query_params = tuple(), stored_content_path = None):
        # checks that two meta data objects are equal
        def check_meta_data(expected, actual):
            assert_equal(len(expected), len(actual))
            for i in range(len(expected)):
                assert_equal(expected[i]['name'], actual[i]['name'])
                # don't check value of 'See Also' field because it changes every request
                if expected[i]['name'] == 'See Also':
                    continue
                else:
                    assert_equal(expected[i]['data'], actual[i]['data'])
        # checks that two lyrics objects are equal
        def check_lyrics(expected, actual):
            assert_equal(len(expected), len(actual))
            for i in range(len(expected)):
                assert_equal(expected[i]['verse_type'], actual[i]['verse_type'])
                assert_equal(expected[i]['verse_content'], actual[i]['verse_content'])
                if 'transliteration' in expected[i]:
                    assert_equal(expected[i]['transliteration'], actual[i]['transliteration'])
                else:
                    assert_false('transliteration' in actual[i])

        # open saved test data
        expected_result_path = Utils.add_query_to_url('test_data/get_song_{}_{}'.format(hymn_type,hymn_number), query_params)
        expected_result_path += '.txt'
        with open(expected_result_path, 'r') as e:
            expected_result = json.loads(e.read())
        # make request to get hymn
        path = 'v2/hymn/{}/{}'.format(hymn_type, hymn_number)
        path = Utils.add_query_to_url(path, query_params)

        # store original open method, so we can call it later
        original_open_method = open

        # this method will be called by the app code when it calls the open(...) method
        def mocked_open_method(path, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None):
            if (path == stored_content_path):
                # if the path is the same as the stored_content_path, then we open it
                return original_open_method('../' + path, mode)
            else:
                # otherwise we just call the original method
                return original_open_method(path, mode)

        if (stored_content_path):
            with patch('builtins.open', Mock(side_effect=mocked_open_method)):
                rv = self.app.get(path)
        else:
            rv = self.app.get(path)
        
        actual_result = json.loads(rv.get_data(as_text=True))
        # assert that components are equal
        assert_equal(expected_result['title'], actual_result['title'])
        check_meta_data(expected_result['meta_data'], actual_result['meta_data'])
        check_lyrics(expected_result['lyrics'], actual_result['lyrics'])
Ejemplo n.º 3
0
    def test_add_query_to_url(self):
        url = 'hymn'
        query = tuple()
        assert_equal('hymn', Utils.add_query_to_url(url, query))

        query = (('gb', '1'),)
        assert_equal('hymn?gb=1', Utils.add_query_to_url(url, query))

        url = 'hymn?gb=1'
        query = (('query', '2'), ('test', '1'))
        result = Utils.add_query_to_url(url, query)
        resulting_query = dict(urlparse.parse_qsl(urlparse.urlparse(result).query))
        assert_equal({'gb': '1', 'query': '2', 'test': '1'}, resulting_query)
Ejemplo n.º 4
0
def get_hymn_internal(hymn_type, hymn_number, additional_args):
    
    # whether or not we need to check if the song exists.
    check_exists = additional_args.get('check_exists', type=bool)

    # if there are any additional query parameters, then pass it directly to hymnal.net
    if 'check_exists' in additional_args:
        del additional_args['check_exists']

    # create path by plugging in the hymn type and number and appending all query params
    path = HYMN_PATH_FORMAT % (hymn_type, hymn_number)
    # make http GET request to song path
    r = requests.get(Utils.add_query_to_url(GET_SONG_URL_FORMAT % path, additional_args))
    log('request sent for: %s' % path)
    
    # create BeautifulSoup object out of html content
    soup = BeautifulSoup(r.text, "html.parser")

    # If the song doesn't exist, hymnal.net will randomly generate a song that doesn't make sense.
    # However, it does it at run time, meaning if you request it twice, it'll have a different title.
    if check_exists:
        r2 = requests.get(GET_SONG_URL_FORMAT % path)
        soup2 = BeautifulSoup(r2.content, "html.parser")
        if soup2.title != soup.title:
            message = {Constants.PUBLIC : Constants.NOT_REAL_SONG % (hymn_type, hymn_number)}
            message['status_code'] = 400
            return (json.dumps(message), 400)

    # data to be returned as json
    json_data = {}
    
    # fill in title
    json_data[soup.title.name] = soup.title.string
    
    # extract meta data (Category, Subcategory, etc)
    meta_data = []
    # meta data contained in side bar
    sidebar = soup.find('div',{'class':'sidebar'})
    # info is in divs with common-panel
    meta_data_divs = sidebar.findChildren('div',{'class':'common-panel'})
    for div in meta_data_divs:
        # search by CSS class
        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#searching-by-css-class
        labels = div.find_all('label', class_= 'col-xs-5')
        if len(labels) == 0:
            continue
        for label in labels:
            name = label.text.replace(':','')
            data = Utils.extract_links(label.findNextSibling(), name_key=VALUE)
            
            # append meta data to meta_data list if it doesn't exist already
            meta_data_object = get_meta_data_object(name, data)
            if meta_data_object not in meta_data:
                meta_data.append(meta_data_object)

    svg = extract_svg(soup)
    if svg is not None:
        meta_data.append(svg)

    json_data[META_DATA] = meta_data

    lyrics = []
    raw_lyrics = soup.find('div',{'class':'lyrics'})

    # for the songs with "View Lyrics (external site)"
    if raw_lyrics.find('div',{'class':'alert'}):
        # Only get the numerical number.
        # This is for when there is a new tune, such as #277b. The "b" doesn't matter when it comes to the lyrics.
        hymn_number = re.findall("\d+", hymn_number)[0]
        
        with open('stored/classic/{}.html'.format(hymn_number), 'r') as data:
            stored_content = data.read()
        content = re.compile(STORED_CLASSIC_LYRICS_REGEX, re.DOTALL).findall(stored_content)[0]
        # filter out the empty items and grab first non-empty item
        content = [str for str in content if str != ''][0]

        # create BeautifulSoup object out of html content
        external_soup = BeautifulSoup(content, "html.parser")
        
        stanza_content = []
        # indicates which stanza we are currently parsing
        stanza_num = 0

        # find all "div"s, which contains a verse or a chorus
        lyric_divs = external_soup.findAll("div")

        # creates a verse object with the stanza num and content
        verse = {}

        # keep track of the previous chorus so we know not to add it if it appears multiple times in a row
        previous_chorus = []

        for lyric_div in lyric_divs:
            # class name of div is "verse" or "chrous"
            isChorus = lyric_div.get("class")[0] == 'chorus'
            
            stanza_content = []
            
            for line in lyric_div.stripped_strings:
                # don't need to include the verse number in the result
                if (line.strip().isdigit()):
                    continue
                else:
                    stanza_content.append(line)

            if isChorus:
                # previous chrous is the same as the current chorus, so just reset everything and continue without appending to lyrics
                if previous_chorus == stanza_content:
                    # reset verse object for next verse
                    verse = {}
                    # reset stanza_content for good measure
                    stanza_content = []
                    continue
                else:
                    previous_chorus = stanza_content
                verse[VERSE_TYPE] = CHORUS
            else:
                verse[VERSE_TYPE] = VERSE
            verse[VERSE_CONTENT] = stanza_content

            # append finished stanza to lyrics hash
            lyrics.append(verse)
            # reset verse object for next verse
            verse = {}
            # reset stanza_content for good measure
            stanza_content = []
    else:
        for td in raw_lyrics.findAll('td'):
            stanza_content = []
        
            # skip td if it is empty or is just a number
            if len(td.text.strip()) == 0 or td.text.strip().isdigit():
                continue
 
            # for each line in the stanza, append to stanza list
            for line in td.strings:
                stanza_content.append(line)
            
            # create and populate verse object with verse_type and verse_content
            verse = {}
            if td.get('class') and 'chorus' in td.get('class'):
                verse[VERSE_TYPE] = CHORUS
            elif td.get('class') and 'copyright' in td.get('class'):
                verse[VERSE_TYPE] = OTHER
            elif td.get('class') and 'note' in td.get('class'):
                verse[VERSE_TYPE] = OTHER
            else:
                verse[VERSE_TYPE] = VERSE
            verse[VERSE_CONTENT] = stanza_content

            # append finished stanza to lyrics hash
            lyrics.append(verse)

    if Utils.has_transliteration(hymn_type, hymn_number):
        for lyric in lyrics:
            # split the original characters, then transliterate and add a space between them
            chars = [list(line) for line in lyric[VERSE_CONTENT]]
            lyric[VERSE_TRANSLITERATION] = [' '.join([pinyin.get(char) for char in char_list]) for char_list in chars]

    json_data[LYRICS] = lyrics

    return json.dumps(json_data, sort_keys=True)