def mergeDictionnaries(): '''Merge temporary dictionaries into one''' print("----Merging generated dictionaries----") files = [] files = glob.glob(savePath + "*.txt") print("Nb files : " + str(len(files))) ArtistsMainDic = dict() ArtistsAllDic = dict() for file in files: filename = file.replace(".txt", "") dictTemp = LU.loadDictionary(filename, path="") if ("All" in file): ArtistsAllDic = {**ArtistsAllDic, **dictTemp} if ("Main" in file): ArtistsMainDic = {**ArtistsMainDic, **dictTemp} print("Artist dictionaries loaded. Merging...", end="") LU.saveDictionary(ArtistsAllDic, AllGenresDicName, savePath, enc="UTF-8") LU.saveDictionary(ArtistsMainDic, MainGenreDicName, savePath, enc="UTF-8") print("done.") print("Deleting temp files...") for file in files: if ("-" in file and "temp" in file): print("Deleting temp file " + file) os.remove(file) print("DONE.")
def expand(df_src, df_dst="completeWithCoordinates.csv", dic_file="locationDic"): df = pd.read_csv(df_src) if('Unnamed: 0' in df.columns): df = df.drop('Unnamed: 0', 1) dic = lu.loadDictionary(dic_file) dic["Palais-des-Congrès"] = (47.134881, 7.248004000000001) dic["Il_Caffè"] = (46.9382202, 7.787970900000001) dic["Festivalgelände"] = (47.4222173, 9.3395195) dic["Festivalgelände-am-Rotten"] = dic["Festivalgelände"] dic["Römerareal"] = (47.136266, 7.30622) addCoordinatesColumn(df, dic, replace = True, inplace=True) lu.saveDictionary(dic, dic_file) df.to_csv(df_dst, index=False)
def createDictionnaryFromArtists(artists): print("Creating dictionnary from genres downloaded from Spotify...") dic = AE.createDictionnary() genres_list = [] for id,row in artists.iterrows(): try: genres = literal_eval(row.genres_spotify) if(genres!=None): genres_list+=(genres) except: print("Error reading "+row.genres_spotify) #print(genres_list) dic = AE.updateDictionnary(genres_list,dic) LU.saveDictionary(dic,filename_genres,PATH_DIC,encoding) return dic
def downloadGenresWikipediaAndRA(Artists,dictionaryOfGenres,dictionaryWiki=None, dictionaryRA=None, begin=0,end=100000): if(end>Artists.shape[0]): end = Artists.shape[0] #path = "FullData/ArtistDataframe_"+str(begin)+"_"+str(end)+".csv" print("Downloading genres from Wikipedia and Resident Advisor of "+str(end-begin)+" artist...") last_read = 0 if(dictionaryWiki==None): dictionaryWiki = {} if(dictionaryRA==None): dictionaryRA = {} for i in range(begin,end): if(i<39700 or i>39900): #Bugs S = Artists[Artists.index==i] artist = S["artist"].values[0] #Get genres from wikipedia genres_wiki = None if(dictionaryWiki!=None and artist in dictionaryWiki): genres_wiki = dictionaryWiki.get(artist) else: genres_wiki = AE.getGenresFromWikipedia(artist,dictionaryOfGenres) dictionaryWiki.update({artist : genres_wiki}) #Get genres from Resident Advisor genres_ra = None if(dictionaryRA!= None and artist in dictionaryRA): genres_ra = dictionaryRA.get(artist) else: genres_ra = AE.getGenresFromRA(artist,dictionaryOfGenres) dictionaryRA.update({artist : genres_ra}) if(genres_wiki== None or len(genres_wiki)<1): genres_wiki = None if(genres_ra == None or len(genres_ra)<1): genres_ra = None Artists.loc[Artists.index==i,"genres_wiki"] = str(genres_wiki) Artists.loc[Artists.index==i,"genres_ra"] = str(genres_ra) if(i%100==0): print(str(i)) LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding) LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding) Artists.to_csv(PATH_ARTISTS) #Saving dictionaries Artists.to_csv(PATH_ARTISTS) LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding) LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding) return Artists
def downloadGenresSpotify(artistsDF,dictionarySpotify=None,begin=0,end=100000): Artists = artistsDF.copy() Artists = Artists.drop_duplicates().reset_index()[["artist"]] Artists.columns=["artist"] Artists["genres_spotify"]=None Artists["genres_ra"]=None Artists["genres_wiki"]=None Artists["genres_events"]=None Artists["main_genres"]=None Artists["top3_genres"]=None Artists["genre"]=None if(end>Artists.shape[0]): end = Artists.shape[0] if(dictionarySpotify==None): dictionarySpotify = {} print("Downloading genres of "+str(end-begin)+" artist from Spotify...") last_read = 0 for i in range(begin,end): S = Artists[Artists.index==i] artist = S["artist"].values[0] genres_spotify=None if(dictionarySpotify!=None and artist in dictionarySpotify): genres_spotify = dictionarySpotify.get(artist) else: genres_spotify = AE.getGenresFromSpotify(artist) dictionarySpotify.update({artist : genres_spotify}) if(len(genres_spotify)<1): genres_spotify = None Artists.loc[Artists.index==i,"genres_spotify"] = str(genres_spotify) if(i%100==0): print(i) LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding) LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding) return Artists
class PlainWeatherUtils(DataInterfaceObject): def __init__(self): super(PlainWeatherUtils, self).__init__("localhost", "webuser", "webuser", "plainweather") self._locations_utils = LocationUtils(self._db_host, self._db_user, self._db_password, self._db_name) def initialize_database(self): """Initializes database for Plain Weather application.""" self.create_database(self._db_name, True) with closing(self._db_engine.connect()) as connection: metadata = MetaData(connection) self.create_tables(connection, self.construct_tables, True) self._locations_utils.add_comments_to_locations_table(connection, "") def construct_tables(self, metadata): self._locations_utils.construct_locations_table(metadata) def get_weather_for_location(self, location): weather_url = 'http://www.google.com/ig/api?weather=' + location req = urllib2.Request(weather_url, None, {'user-agent':'syncstream/vimeo'}) opener = urllib2.build_opener() f = opener.open(req) xml_results = f.read() self.__remember_weather_query(location, xml_results) return xml_results def get_recent_locations(self, max_records): location_records = self._locations_utils.get_location_records(max_records) locations = [loc.name for loc in location_records] return locations def __remember_weather_query(self, location, xml_results): try: xml_dom = minidom.parseString(xml_results) problem = xml_dom.getElementsByTagName('problem_cause') if (len(problem) == 0): cities = xml_dom.getElementsByTagName('city') if (len(cities) > 0): city_node = cities[0] city_data = city_node.getAttribute('data') location_record = self._locations_utils.update_location_record_time(city_data, datetime.datetime.now()) #StandardError does not pick up minidom parsing error. except: return
def addCoordinatesColumn(df, dictionary=None, location_column_name="location", coordinates_column_name = "coordinates",replace=False, inplace = False, debug=False): tmp = df.copy() if(coordinates_column_name in df.columns): if(replace): if(inplace): df.drop(coordinates_column_name,1, inplace=True) else: tmp.drop(coordinates_column_name,1, inplace=True) else: print("column ",coordinates_column_name," already in dataframe.") return serie = pd.Series(data = np.nan, index = df.index, dtype = np.dtype) for (i,r) in df.iterrows(): serie[i]= lu.getLocation(df.loc[i,location_column_name], dictionary)[0] if(debug and i%128 == 0): print(i) if(inplace): df.insert(len(df.columns), coordinates_column_name,serie) else: tmp.insert(len(tmp.columns), coordinates_column_name,serie) return tmp
def __init__(self): super(PlainWeatherUtils, self).__init__("localhost", "webuser", "webuser", "plainweather") self._locations_utils = LocationUtils(self._db_host, self._db_user, self._db_password, self._db_name)
def save(self, force_insert=False, force_update=False, using=None): """Overridden to update the Lat and lon on save.""" LocationUtils.updateGeo(self) super(Location, self).save(force_insert, force_update, using)
DataFrame = None SpotifyDic = None RADic = None WikiDic = None GenresDic = None #Getting the dataframe try: Dataframe = pd.read_csv(PATH_DF,index_col=0) except: print("Error occured during read of "+PATH_DF+". Maybe can't find the file.") sys.exit(0) #Getting the Spotify dictionary try: SpotifyDic = LU.loadDictionary(filename_spotify_dic,PATH_DIC,encoding) except: print("Can't find the dictionary of Spotify genres.") SpotifyDic = None #Getting the RA dictionary try: RADic = LU.loadDictionary(filename_ra_dic,PATH_DIC,encoding) except: print("Can't find the dictionary of RA genres.") RADic = None #Getting the Wikipedia dictionary try: WikiDic = LU.loadDictionary(filename_wiki_dic,PATH_DIC,encoding) except:
def ExportGenres(ClubDataFrame, init, end): ArtistsSet = set() ArtistDicoMain = dict() ArtistDicoAll = dict() filenameMain = "ArtistDicoMain-" + str(init) + "-" + str(end) + "-temp" filenameAll = "ArtistDicoAll-" + str(init) + "-" + str(end) + "-temp" #Loading artists dictionaries try: ArtistDicoMain = LU.loadDictionary(filenameMain, path=savePath, enc="UTF-8") ArtistDicoAll = LU.loadDictionary(filenameAll, path=savePath, enc="UTF-8") ArtistsSet = set(ArtistDicoMain.keys()) except: print("Cannot find dictionnaries") ##Loading genre Dictionary try: print("Loading genre dictionnary : " + dictionaryPath + dictionary + ".txt") LU.loadDictionary(dictionary, dictionaryPath, enc="UTF-8") except: print("Cannot find genre dictionnary : " + dictionaryPath + dictionary + ".txt") ClubDataFrame["Genre"] = None ClubDataFrame["All Genres"] = None print("Retrieving genre for events[" + str(init) + "," + str(end) + "] :") i = 0 for id, row in ClubDataFrame[init:end].iterrows(): genres = [] lineup = row["LineUp"] artists = SplitLineup(lineup) for artist in artists: mainGenre = None allGenres = None if (artist in ArtistsSet): mainGenre = ArtistDicoMain.get(artist) allGenres = ArtistDicoAll.get(artist) else: #Updating dictionnaries ArtistsSet.add(artist) allGenres = AE.getGenre(artist, ReturnAllGenres=True) mainGenre = AE.getMaxGenre(allGenres) ArtistDicoMain.update({artist: mainGenre}) ArtistDicoAll.update({artist: allGenres}) #Adding to LineUp genres genres.append(mainGenre) if (len(genres) == 0): print(artists) else: maxGenre = AE.getMaxGenre(genres) #updating dataframe ClubDataFrame = ClubDataFrame.set_value(id, "Genre", maxGenre) ClubDataFrame = ClubDataFrame.set_value(id, "All Genres", str(genres)) ClubDataFrame = ClubDataFrame.set_value(id, "LineUp", str(artists)) i += 1 if (i % 10 == 0): print(str(i)) if (i % 50 == 0): LU.saveDictionary(ArtistDicoMain, filenameMain, path=savePath, enc="UTF-8") LU.saveDictionary(ArtistDicoAll, filenameAll, path=savePath, enc="UTF-8") print("Extraction finished. Saving dictionnaries..") LU.saveDictionary(ArtistDicoMain, filenameMain, path=savePath, enc="UTF-8") LU.saveDictionary(ArtistDicoAll, filenameAll, path=savePath, enc="UTF-8") print("Finished.") mergeDictionnaries()