def get_clean_graph_generator_memory_repos(): artists_ngram_repo = MemoryEntityNgrams(base_entity_uri=base_entities_URI, type_of_entity_collection=ARTIST_COLLECTION) songs_ngram_repo = MemoryEntityNgrams(base_entity_uri=base_entities_URI, type_of_entity_collection=SONG_COLLECTION) entity_counter_repo = MemoryEntityCounter() songs_ngram_repo.reset_collection() artists_ngram_repo.reset_collection() entity_counter_repo.reset_count() return GraphGenerator(repo_songs=songs_ngram_repo, repo_artist=artists_ngram_repo, repo_counter=entity_counter_repo)
set_of_artist = read_necessary_index_songs("files/artists_indexes_for_500000_discogs_songs.txt") set_of_ids_aol = read_aol_discogs_ids("files/random_aol.txt") set_of_musicbrainz_ids = read_mb_discogs_ids("files/random_musicbrainz.tsv") ip_mongo = "127.0.0.1" port_mongo = 27017 path_mongo = ip_mongo + ":" + str(port_mongo) dataset = Dataset(title="selected_discogs_indexes") artist_parser = DiscogsArtistParserFiltering("files/discogs_artists.xml", dataset, set_of_artist) graph = MeraRdflibGraph(rdflib_graph=Graph()) artist_ngrams = MemoryEntityNgrams(base_entity_uri=base_entities_URI, type_of_entity_collection=ARTIST_COLLECTION, load_file=None) song_ngrams = MemoryEntityNgrams(base_entity_uri=base_entities_URI, type_of_entity_collection=SONG_COLLECTION, load_file=None) counter = MemoryEntityCounter() # artist_ngrams = MongoEntityNgramsRepository(base_entity_uri=base_entities_URI, # type_of_entity_collection=ARTIST_COLLECTION, # url_root=path_mongo, # host=ip_mongo, # port=port_mongo) # song_ngrams = MongoEntityNgramsRepository(base_entity_uri=base_entities_URI, # type_of_entity_collection=SONG_COLLECTION, # url_root=path_mongo, # host=ip_mongo, # port=port_mongo)