Esempio n. 1
0
def mergeDictionnaries():
    '''Merge temporary dictionaries into one'''
    print("----Merging generated dictionaries----")
    files = []
    files = glob.glob(savePath + "*.txt")
    print("Nb files : " + str(len(files)))
    ArtistsMainDic = dict()
    ArtistsAllDic = dict()

    for file in files:
        filename = file.replace(".txt", "")
        dictTemp = LU.loadDictionary(filename, path="")

        if ("All" in file):
            ArtistsAllDic = {**ArtistsAllDic, **dictTemp}

        if ("Main" in file):
            ArtistsMainDic = {**ArtistsMainDic, **dictTemp}

    print("Artist dictionaries loaded. Merging...", end="")
    LU.saveDictionary(ArtistsAllDic, AllGenresDicName, savePath, enc="UTF-8")
    LU.saveDictionary(ArtistsMainDic, MainGenreDicName, savePath, enc="UTF-8")
    print("done.")
    print("Deleting temp files...")
    for file in files:
        if ("-" in file and "temp" in file):
            print("Deleting temp file " + file)
            os.remove(file)
    print("DONE.")
Esempio n. 2
0
def expand(df_src, df_dst="completeWithCoordinates.csv", dic_file="locationDic"):
    df = pd.read_csv(df_src)
    if('Unnamed: 0' in df.columns):
        df = df.drop('Unnamed: 0', 1)
    dic = lu.loadDictionary(dic_file)
    dic["Palais-des-Congrès"] = (47.134881, 7.248004000000001)
    dic["Il_Caffè"] = (46.9382202, 7.787970900000001)
    dic["Festivalgelände"] = (47.4222173, 9.3395195)
    dic["Festivalgelände-am-Rotten"] = dic["Festivalgelände"]
    dic["Römerareal"] = (47.136266, 7.30622)
    addCoordinatesColumn(df, dic, replace = True, inplace=True)
    lu.saveDictionary(dic, dic_file)
    df.to_csv(df_dst, index=False)
Esempio n. 3
0
def createDictionnaryFromArtists(artists):
	print("Creating dictionnary from genres downloaded from Spotify...")
	dic = AE.createDictionnary()
	genres_list = []
	for id,row in artists.iterrows():
		try:
			genres = literal_eval(row.genres_spotify)
			if(genres!=None):
				genres_list+=(genres)
		except:
			print("Error reading "+row.genres_spotify)
		
	#print(genres_list)
	dic = AE.updateDictionnary(genres_list,dic)
	LU.saveDictionary(dic,filename_genres,PATH_DIC,encoding)
	return dic                   
Esempio n. 4
0
def downloadGenresWikipediaAndRA(Artists,dictionaryOfGenres,dictionaryWiki=None, dictionaryRA=None, begin=0,end=100000):

	if(end>Artists.shape[0]):
		end = Artists.shape[0]
	
	#path = "FullData/ArtistDataframe_"+str(begin)+"_"+str(end)+".csv"
	print("Downloading genres from Wikipedia and Resident Advisor of "+str(end-begin)+" artist...")
	last_read = 0
	
	if(dictionaryWiki==None):
		dictionaryWiki = {}
	if(dictionaryRA==None):
		dictionaryRA = {}
	
	for i in range(begin,end):
		if(i<39700 or i>39900): #Bugs
			S = Artists[Artists.index==i]
			artist = S["artist"].values[0]
			
			#Get genres from wikipedia
			genres_wiki = None
			if(dictionaryWiki!=None and artist in dictionaryWiki):
				genres_wiki = dictionaryWiki.get(artist)
			else:
				genres_wiki = AE.getGenresFromWikipedia(artist,dictionaryOfGenres)
				dictionaryWiki.update({artist : genres_wiki})	
				
			#Get genres from Resident Advisor
			genres_ra = None
			if(dictionaryRA!= None and artist in dictionaryRA):
				genres_ra = dictionaryRA.get(artist)
			else:
				genres_ra = AE.getGenresFromRA(artist,dictionaryOfGenres)
				dictionaryRA.update({artist : genres_ra})
			
			if(genres_wiki== None or len(genres_wiki)<1):
				genres_wiki = None
			if(genres_ra == None or len(genres_ra)<1):
				genres_ra = None
				
			Artists.loc[Artists.index==i,"genres_wiki"] = str(genres_wiki)
			Artists.loc[Artists.index==i,"genres_ra"] = str(genres_ra)

			if(i%100==0):
				print(str(i))
				LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding)
				LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding)
				Artists.to_csv(PATH_ARTISTS)
				
	#Saving dictionaries
	Artists.to_csv(PATH_ARTISTS)
	LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding)
	LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding)	
	return Artists
Esempio n. 5
0
def downloadGenresSpotify(artistsDF,dictionarySpotify=None,begin=0,end=100000):

	Artists = artistsDF.copy()
	Artists = Artists.drop_duplicates().reset_index()[["artist"]]
	Artists.columns=["artist"]
	Artists["genres_spotify"]=None
	Artists["genres_ra"]=None
	Artists["genres_wiki"]=None
	Artists["genres_events"]=None
	Artists["main_genres"]=None
	Artists["top3_genres"]=None
	Artists["genre"]=None    
	
	if(end>Artists.shape[0]):
		end = Artists.shape[0]
        
	if(dictionarySpotify==None):
			dictionarySpotify = {}
	
	print("Downloading genres of "+str(end-begin)+" artist from Spotify...")
	last_read = 0
	for i in range(begin,end):
		S = Artists[Artists.index==i]
		artist = S["artist"].values[0]
		genres_spotify=None
		if(dictionarySpotify!=None and artist in dictionarySpotify):
			genres_spotify = dictionarySpotify.get(artist)
		else:
			genres_spotify = AE.getGenresFromSpotify(artist)
			dictionarySpotify.update({artist : genres_spotify})
			
		
		if(len(genres_spotify)<1):
			genres_spotify = None
		Artists.loc[Artists.index==i,"genres_spotify"] = str(genres_spotify)
		
		if(i%100==0):
			print(i)
			LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding)
			
	LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding)
	return Artists
class PlainWeatherUtils(DataInterfaceObject):
    def __init__(self):
        super(PlainWeatherUtils, self).__init__("localhost", "webuser", "webuser", "plainweather")        
        self._locations_utils = LocationUtils(self._db_host, self._db_user, self._db_password, self._db_name)
  
                   
    def initialize_database(self):
        """Initializes database for Plain Weather application."""
        self.create_database(self._db_name, True)           
        with closing(self._db_engine.connect()) as connection:
            metadata = MetaData(connection)    
            self.create_tables(connection, self.construct_tables, True)
            self._locations_utils.add_comments_to_locations_table(connection, "")  
        
        
    def construct_tables(self, metadata):
        self._locations_utils.construct_locations_table(metadata)
 
            
    def get_weather_for_location(self, location):
        weather_url = 'http://www.google.com/ig/api?weather=' + location
        req = urllib2.Request(weather_url, None, {'user-agent':'syncstream/vimeo'})
        opener = urllib2.build_opener() 
        f = opener.open(req)
        xml_results = f.read()
        self.__remember_weather_query(location, xml_results)
        return xml_results       


    def get_recent_locations(self, max_records):
        location_records = self._locations_utils.get_location_records(max_records)
        locations = [loc.name for loc in location_records]
        return locations       

   
    def __remember_weather_query(self, location, xml_results):
        try:
            xml_dom = minidom.parseString(xml_results)
            problem = xml_dom.getElementsByTagName('problem_cause')
            if (len(problem) == 0):
                cities = xml_dom.getElementsByTagName('city')
                if (len(cities) > 0):
                    city_node = cities[0]
                    city_data = city_node.getAttribute('data')
                    location_record = self._locations_utils.update_location_record_time(city_data, datetime.datetime.now())
        #StandardError does not pick up minidom parsing error.
        except:
            return
Esempio n. 7
0
def addCoordinatesColumn(df, dictionary=None, location_column_name="location", coordinates_column_name = "coordinates",replace=False, inplace = False, debug=False):
    
    tmp = df.copy()
    if(coordinates_column_name in df.columns):
        if(replace):
            if(inplace):
                df.drop(coordinates_column_name,1, inplace=True)
            else:
                tmp.drop(coordinates_column_name,1, inplace=True)
        else:
            print("column ",coordinates_column_name," already in dataframe.")
            return 
    serie = pd.Series(data = np.nan, index = df.index, dtype = np.dtype)
    for (i,r) in df.iterrows():
        serie[i]= lu.getLocation(df.loc[i,location_column_name], dictionary)[0]
        if(debug and i%128 == 0): 
            print(i)
    if(inplace):
        df.insert(len(df.columns), coordinates_column_name,serie)
    else: 
        tmp.insert(len(tmp.columns), coordinates_column_name,serie)
        return tmp
 def __init__(self):
     super(PlainWeatherUtils, self).__init__("localhost", "webuser", "webuser", "plainweather")        
     self._locations_utils = LocationUtils(self._db_host, self._db_user, self._db_password, self._db_name)
Esempio n. 9
0
    def save(self, force_insert=False, force_update=False, using=None):
        """Overridden to update the Lat and lon on save."""

        LocationUtils.updateGeo(self)
        super(Location, self).save(force_insert, force_update, using)
Esempio n. 10
0
	DataFrame = None
	SpotifyDic = None
	RADic = None
	WikiDic = None
	GenresDic = None
	
	#Getting the dataframe
	try:
		Dataframe = pd.read_csv(PATH_DF,index_col=0)
	except:
		print("Error occured during read of "+PATH_DF+". Maybe can't find the file.")
		sys.exit(0)
	
	#Getting the Spotify dictionary
	try:
		SpotifyDic = LU.loadDictionary(filename_spotify_dic,PATH_DIC,encoding)
	except:
		print("Can't find the dictionary of Spotify genres.")
		SpotifyDic = None
		
	#Getting the RA dictionary
	try:
		RADic = LU.loadDictionary(filename_ra_dic,PATH_DIC,encoding)
	except:
		print("Can't find the dictionary of RA genres.")
		RADic = None
		
	#Getting the Wikipedia dictionary
	try:
		WikiDic = LU.loadDictionary(filename_wiki_dic,PATH_DIC,encoding)
	except:
Esempio n. 11
0
def ExportGenres(ClubDataFrame, init, end):

    ArtistsSet = set()
    ArtistDicoMain = dict()
    ArtistDicoAll = dict()

    filenameMain = "ArtistDicoMain-" + str(init) + "-" + str(end) + "-temp"
    filenameAll = "ArtistDicoAll-" + str(init) + "-" + str(end) + "-temp"

    #Loading artists dictionaries
    try:
        ArtistDicoMain = LU.loadDictionary(filenameMain,
                                           path=savePath,
                                           enc="UTF-8")
        ArtistDicoAll = LU.loadDictionary(filenameAll,
                                          path=savePath,
                                          enc="UTF-8")
        ArtistsSet = set(ArtistDicoMain.keys())
    except:
        print("Cannot find dictionnaries")

    ##Loading genre Dictionary
    try:
        print("Loading genre dictionnary : " + dictionaryPath + dictionary +
              ".txt")
        LU.loadDictionary(dictionary, dictionaryPath, enc="UTF-8")
    except:
        print("Cannot find genre dictionnary : " + dictionaryPath +
              dictionary + ".txt")

    ClubDataFrame["Genre"] = None
    ClubDataFrame["All Genres"] = None

    print("Retrieving genre for events[" + str(init) + "," + str(end) + "] :")

    i = 0
    for id, row in ClubDataFrame[init:end].iterrows():
        genres = []
        lineup = row["LineUp"]
        artists = SplitLineup(lineup)

        for artist in artists:
            mainGenre = None
            allGenres = None

            if (artist in ArtistsSet):
                mainGenre = ArtistDicoMain.get(artist)
                allGenres = ArtistDicoAll.get(artist)
            else:
                #Updating dictionnaries
                ArtistsSet.add(artist)
                allGenres = AE.getGenre(artist, ReturnAllGenres=True)
                mainGenre = AE.getMaxGenre(allGenres)
                ArtistDicoMain.update({artist: mainGenre})
                ArtistDicoAll.update({artist: allGenres})

            #Adding to LineUp genres
            genres.append(mainGenre)

        if (len(genres) == 0):
            print(artists)
        else:
            maxGenre = AE.getMaxGenre(genres)
            #updating dataframe
            ClubDataFrame = ClubDataFrame.set_value(id, "Genre", maxGenre)
            ClubDataFrame = ClubDataFrame.set_value(id, "All Genres",
                                                    str(genres))
            ClubDataFrame = ClubDataFrame.set_value(id, "LineUp", str(artists))

        i += 1
        if (i % 10 == 0):
            print(str(i))
        if (i % 50 == 0):
            LU.saveDictionary(ArtistDicoMain,
                              filenameMain,
                              path=savePath,
                              enc="UTF-8")
            LU.saveDictionary(ArtistDicoAll,
                              filenameAll,
                              path=savePath,
                              enc="UTF-8")

    print("Extraction finished. Saving dictionnaries..")
    LU.saveDictionary(ArtistDicoMain, filenameMain, path=savePath, enc="UTF-8")
    LU.saveDictionary(ArtistDicoAll, filenameAll, path=savePath, enc="UTF-8")
    print("Finished.")
    mergeDictionnaries()