def test_many_real_songs(self):
        parser = DiscogsSongParser(file_path=rel_path_to_file("../../files/discogs_releases.xml", __file__),
                                   dataset=Dataset(title="TestDataset"))

        counter = 1
        for a_song in parser.parse_songs():
            counter += 1
            if counter % 50000 == 0:  # 50.000
                break
    def test_entity_detection(self):
        parser = DiscogsSongParser(file_path=rel_path_to_file("../../files/releases_piece_big.xml", __file__),
                                   dataset=Dataset(title="TestDataset"))

        counter_songs = 0
        counter_writers = 0
        for a_song in parser.parse_songs():
            counter_songs += 1
            for a_coll in a_song.collaborations:
                if a_coll.role == ROLE_WRITER:
                    counter_writers += 1
            for an_alt in a_song.alternative_titles:
                print a_song.canonical, an_alt

        self.assertEqual(281, counter_songs, msg="Expected 281 songs, but parsed " + str(counter_songs))
        self.assertEqual(427, counter_writers, msg="Expected 427 songs with writter, but parsed " + str(counter_writers))
 def _process_songs_node(songs_node, artists, collaborations, album, genres, country, release_date, release_id):
     for song_node in list(songs_node):
         title = None
         discogs_id = None
         extra_collaborations = []
         for elem in list(song_node):
             if elem.tag == SONG_TITLE:
                 title = normalize_discogs_name(elem.text)
             elif elem.tag == TRACK_POSITION:
                 discogs_id = DiscogsSongParser.build_discogs_id(release_id, elem.text)
             elif elem.tag == COLLABORATIONS:
                 for a_coll in DiscogsSongParserFilteringNoNamevars._process_collaborations_node(elem):
                     if a_coll[0] == ROLE_FEATURER:
                         artists.append(a_coll[1])
                     else:
                         extra_collaborations.append(a_coll[1])
         if title not in EMPTY_CONTENT:
             yield Song(canonical=title,
                        discogs_id=discogs_id,
                        artists=artists,
                        collaborations=collaborations)