예제 #1
0
 def __init__(self):
     try:
         self.db = MongoClient().Billboard
     except Exception as e:
         print e
         sys.exit()
     self.alchemyObj = AlchemyAPI() #sentiment analysis API
     self.alchemyObj.loadAPIKey('AlchemyAPIKey.txt')
예제 #2
0
def get_alchemy_subjects_remote(url):
    registry = getUtility(IRegistry)
    settings = registry.forInterface(ITagHelperSettingsSchema)
    api_key = settings.alchemy_api_key
    if api_key:
        alchemyObj = AlchemyAPI()
        alchemyObj.setAPIKey(api_key)
        try:
            result = alchemyObj.URLGetRankedConcepts(url)
            results += _list_alchemy_results(result)
            #result = alchemyObj.URLGetRankedKeywords(url)
            #results += _list_alchemy_results(result)
            #results = list(set(results))
            return results
        except:
            return results
    else:
        return results
예제 #3
0
def get_alchemy_subjects_remote(url):
    registry = getUtility(IRegistry)
    settings = registry.forInterface(ITagHelperSettingsSchema)
    api_key = settings.alchemy_api_key
    if api_key:
        alchemyObj = AlchemyAPI()
        alchemyObj.setAPIKey(api_key)
        try:
            result = alchemyObj.URLGetRankedConcepts(url)
            results += _list_alchemy_results(result)
            # result = alchemyObj.URLGetRankedKeywords(url)
            # results += _list_alchemy_results(result)
            # results = list(set(results))
            return results
        except:
            return results
    else:
        return results
예제 #4
0
def get_alchemy_subjects(text):
    registry = getUtility(IRegistry)
    settings = registry.forInterface(ITagHelperSettingsSchema)
    api_key = settings.alchemy_api_key
    relevance = settings.alchemy_relevance
    results = []
    if api_key:
        alchemyObj = AlchemyAPI()
        alchemyObj.setAPIKey(api_key)
        try:
            result = alchemyObj.TextGetRankedConcepts(text)
            results += _list_alchemy_results(result, relevance)
            #result = alchemyObj.TextGetRankedKeywords(text)
            #results += _list_alchemy_results(result)
            #results = list(set(results))
            return results
        except:
            return results
    else:
        return results
예제 #5
0
def get_alchemy_subjects(text):
    registry = getUtility(IRegistry)
    settings = registry.forInterface(ITagHelperSettingsSchema)
    api_key = settings.alchemy_api_key
    relevance = settings.alchemy_relevance
    results = []
    if api_key:
        alchemyObj = AlchemyAPI()
        alchemyObj.setAPIKey(api_key)
        try:
            result = alchemyObj.TextGetRankedConcepts(text)
            results += _list_alchemy_results(result, relevance)
            # result = alchemyObj.TextGetRankedKeywords(text)
            # results += _list_alchemy_results(result)
            # results = list(set(results))
            return results
        except:
            return results
    else:
        return results
    # command: TextExtraction
    min_content_length = config['processing']['commands'] \
        ['text_extraction']['min_content_length']
    commands.append(TextExtractionCommand(te_client, min_content_length))

    # command: Parsing
    commands.append(ParsingCommand())

    # command: TermExtraction
    lemmatizer = Lemmatizer()
    min_term_count = config['processing']['commands'] \
        ['term_extraction']['min_term_count']
    commands.append(TermExtractionCommand(lemmatizer, min_term_count))

    # command: AlchemyEntityExtraction
    alchemy = AlchemyAPI()
    alchemy.setAPIKey(config['alchemyapi']['key'])
    alchemy_cache_collection = mcm.connection[cache_db][config['alchemyapi']['cache']['namespace']]
    alchemy_cache = MongoCache(alchemy_cache_collection)
    min_entity_count = config['processing']['commands'] \
        ['entity_extraction']['min_entity_count']
    commands.append(AlchemyEntityExtractionCommand(alchemy, alchemy_cache, min_entity_count))

    # command: Building
    commands.append(BuildingCommand())

    # start service clients
    te_client.connect()

    # set up the server
    handler = ProcessingHandler(commands)
예제 #7
0
class DBController(object):
    def __init__(self):
        try:
            self.db = MongoClient().Billboard
        except Exception as e:
            print e
            sys.exit()
        self.alchemyObj = AlchemyAPI() #sentiment analysis API
        self.alchemyObj.loadAPIKey('AlchemyAPIKey.txt')
    
    def insertSong(self, title, artist):
        if self.getSongId(title, artist) is None:
            newId = self.db.song.count()
            self.db.song.insert({'id' : newId, 'title': title, 'artist' : artist})
            return newId
        
    def insertSongFromSongList(self, songList):
        for title, artist in songList:
            self.insertSong(title, artist)
        
    def getSongId(self, title, artist=None):
        song = self.db.song.find_one({'title': title}) if artist is None else self.db.song.find_one({'title': title, 'artist' : artist}) 
        return None if song is None else song['id']
        
    def getSongById(self, songId):
        return self.db.song.find_one({'id' : songId})
    
    def getAllSongs(self):
        return list(self.db.song.find()) 
    
    def getSongByWeek(self, week):
        top50Dict = self.db.top50.find_one({'week' : week})
        if top50Dict is None:
            return None
        else:
            songList = []
            for songId in top50Dict['rank']:
                song = self.db.song.find_one({'id' : songId})
                songList.append(song)
            return songList
    
    def checkTop50ExistInDB(self, date):
        chart = self.db.top50.find_one({'week' : dateToSaturday(date)})
        if chart is None or len(chart['rank']) != 50:
            return False
        else:
            return True
    
    def checkSalesRankExistInDB(self, date):
        chart = self.db.sales.find_one({'week' : dateToSaturday(date)})
        return False if chart is None or len(chart['rank']) != 40 else True
    
    def insertCommentToDB(self, songId, commentsList):
        self.db.youtube.remove({'id' : songId})
        self.db.youtube.insert({'id' : songId, 'comment' : commentsList})
        
    def insertSalesChartToDB(self, week, chart):
        week = dateToSaturday(week)
        self.db.sales.remove({'week' : week})
        songList = []
        for title, artist, _, _, _, _ in chart:
            songId = self.getSongId(title, artist)
            if songId is None:
                songId = self.getSongId(title)
                if songId is None:
                    #only insert new song from Billboard chart
                    songId = None
            songList.append(songId)
        self.db.sales.insert({'week' : week, 'rank' : songList})
        
    def insertTop50ChartToDB(self, week, chart):
        #week is a datetime object, except for date part, other must be zero
        week = dateToSaturday(week) 
        self.db.top50.remove({'week' : week})
        songList = []
        for _, title, artist in chart:
            songId = self.getSongId(title, artist)
            if songId is None:
                songId = self.insertSong(title, artist)
            songList.append(songId)
        self.db.top50.insert({'week' : week, 'rank' : songList})
    
    def insertIMVDBDataToDB(self, songId, viewStatDataList, socialInteractionDataList, detailStatDataDict):
        dataDict = self.db.IMVDB.find_one({'id' : songId})
        if dataDict is not None:
            dataDict['viewCount'] = self.mergeDataList(dataDict['viewCount'], viewStatDataList)
            dataDict['socialInteraction'] = self.mergeDataList(dataDict['socialInteraction'], socialInteractionDataList)
            dataDict['detailData'] = self.mergeDataList(dataDict['detailData'], [detailStatDataDict])
        else:
            dataDict = {'id' : songId, 'viewCount' : viewStatDataList, 'socialInteraction' : socialInteractionDataList, 'detailData' : [detailStatDataDict]}
        self.db.IMVDB.remove({'id' : songId})
        self.db.IMVDB.insert(dataDict)
        
    def mergeDataList(self, oldList, newList):
        sorted(oldList, key= lambda item : item['week'])
        sorted(newList, key= lambda item : item['week'])
        index = 0
        for i, item in enumerate(oldList):
            if item['week'] >= newList[0]['week']:
                index = i
                break
        return oldList[0 : index] + newList
        
    def insertMTVReviewToDB(self, songId, review):
        self.db.MTV.remove({'id' : songId})
        self.db.MTV.insert({'id' : songId, 'review' : review})
        
    def getSongIdListByWeek(self, week):
        return self.db.top50.find_one({'week' : week})['rank']
    
    def getTop50Rank(self, week, songId):
        try:
            index =  self.db.top50.find_one({'week' : week})['rank'].index(songId)
            return index + 1
        except:
            return None

    def getSalesRank(self, week, songId):
        try:
            index = self.db.sales.find_one({'week' : week})['rank'].index(songId)
            return index + 1
        except:
            return None 
    
    def getRadioRank(self, week, songId):
        radioDict = self.db.radio.find_one({'week' : week})
        if radioDict is not None:
            for songDict in radioDict['songs']:
                if songDict['id'] == songId:
                    return int(songDict['rank'])
        return None
    
    def getStreamingRank(self, week, songId):
        streamingDict = self.db.streaming.find_one({'week' : week})
        if streamingDict is not None:
            for songDict in streamingDict['songs']:
                if songDict['id'] == songId:
                    return int(songDict['rank'])
        return None
    
    def getIMVDBData(self, week, songId):
        viewIndex, socialInteractionIndex = None, None #give an avgerage value
        songDict = self.db.IMVDB.find_one({'id' : songId})
        if songDict is not None:
            for viewCountDict in songDict['viewCount']:
                if week == viewCountDict['week']:
                    viewIndex = log10(viewCountDict['count'])
                    break
            for socialDict in songDict['socialInteraction']:
                if week == socialDict['week']:
                    socialInteractionIndex = log10(socialDict['count'])
                    break
        return viewIndex, socialInteractionIndex
      
    def getMTVReviewData(self, week, songId, useAlchemyAPI=False):
        MTVDict = self.db.MTV.find_one({'id' : songId})
        count, text = 0, ''
        if MTVDict is not None and 'review' in MTVDict:
            for review in MTVDict['review']:
                if review['week'] <= week:
                    count += 1
                    if useAlchemyAPI:
                        text += review['content']
        if count == 0:
            return None, None
        else:
            score = self.getSentimentScoreFromAPI(text) if useAlchemyAPI else self.getSentimentScoreFromLIWC(week, songId, 'MTV')                        
            return count, score
    
    def getYoutubeData(self, week, songId, useAlchemyAPI=False):
        youtubeDict = self.db.youtube.find_one({'id' : songId})
        count, text = 0, ''
        if youtubeDict is not None:
            for comment in youtubeDict['comment']:
                if comment['week'] == week:
                    count += 1
                    if useAlchemyAPI:
                        text += comment['comment']
        if count == 0:
            return None, None
        else:
            score = self.getSentimentScoreFromAPI(text) if useAlchemyAPI else self.getSentimentScoreFromLIWC(week, songId, 'youtube')
            return count, score
    
    def getTwitterData(self, week, songId, useAlchemyAPI=False):
        twitterDict = self.db.twitter.find_one({'week' : week})
        count, text = 0, ''
        if twitterDict is not None:
            for songDict in twitterDict['songs']:
                if songDict['id'] == songId:
                    count = len(songDict['tweets'])
                    if useAlchemyAPI:
                        for tweet in songDict['tweets']:
                            text += tweet['text']
        if count == 0:
            return None, None
        else:
            score = self.getSentimentScoreFromAPI(text) if useAlchemyAPI else self.getSentimentScoreFromLIWC(week, songId, 'twitter')
            return count, score
    
    def insertFeatureToDB(self, featureDict):
        self.db.feature.remove({'week' : featureDict['week'], 'id' : featureDict['id']})
        self.db.feature.insert(featureDict)
    
    def getFeature(self, week, songId):
        return self.db.feature({'week' : week, 'song' : songId})
    
    def getSentimentScoreFromAPI(self, text):
        if text is None:
            return 0
        else:
            try:
                result = self.alchemyObj.TextGetTextSentiment(text)
                score = float(result.split('<score>')[1].split('</score>')[0])
                return score * 100 #normalize
            except Exception as e:
                print e
                return 0
    
    def getSentimentScoreFromLIWC(self, week, songId, scoreType):
        sentimentDict = self.db.sentiment.find_one({'week' : week, 'song_id' : songId})
        if sentimentDict is None:
            return 0
        elif scoreType == 'youtube':
            posScore = 0 if sentimentDict['youtube_posemo'] is None else sentimentDict['youtube_posemo']
            negScore = 0 if sentimentDict['youtube_negemo'] is None else sentimentDict['youtube_negemo']
        elif scoreType == 'MTV':
            posScore = 0 if sentimentDict['MTV_posemo'] is None else sentimentDict['MTV_posemo']
            negScore = 0 if sentimentDict['MTV_negemo'] is None else sentimentDict['MTV_negemo']
        elif scoreType == 'twitter':
            posScore = 0 if sentimentDict['twitter_posemo'] is None else sentimentDict['twitter_posemo']
            negScore = 0 if sentimentDict['twitter_negemo'] is None else sentimentDict['twitter_negemo']
        else:
            raise Exception('invalid score type')
        return (posScore - negScore)
    
    def isFeatureInDB(self, week, songId):
        return self.db.feature.find_one({'week' : week, 'id' : songId}) is not None
    
    def getFeatureListByWeek(self, week):
        return list(self.db.feature.find({'week' : week}))
        
    def getAllFeatureListBySong(self, songId):
        return list(self.db.feature.find({'id' : songId}))
    
    def getFeatureVectorByWeekAndSongId(self, week, songId):
        return self.db.feature.find_one({'week' : week, 'id' : songId})
# -*- coding: utf-8 -*-
"""
Created on Sat May 28 22:04:26 2016

@author: Matts42
"""

from AlchemyAPI import AlchemyAPI
alchemyapi = AlchemyAPI()
from nltk.corpus.reader.plaintext import PlaintextCorpusReader

newcorpus = newcorpus = PlaintextCorpusReader('newcorpus/', '.*')