def extractFeatureToDB(self, beginWeek, endWeek=datetime.today(), isReload=False, useAlchemyAPI=False):
     if beginWeek < datetime(2007, 1, 7) or endWeek > datetime.today():
         raise Exception('Invalid input date!')
     beginWeek, endWeek = dateToSaturday(beginWeek), dateToSaturday(endWeek)
     endWeek = endWeek - timedelta(days=7) if endWeek > datetime.today() else endWeek
     iterWeek = beginWeek
     db = DBController()
     while iterWeek <= endWeek:
         lastWeek = iterWeek - timedelta(days=7)
         songRankList = db.getSongIdListByWeek(lastWeek)
         for songId in songRankList:
             if isReload == False and db.isFeatureInDB(iterWeek, songId):
                 continue
             featureDict = {}
             featureDict['id'] = songId
             featureDict['week'] = iterWeek
             featureDict['sales'] = db.getSalesRank(lastWeek, songId)
             featureDict['radio'] = db.getRadioRank(lastWeek, songId)
             featureDict['streaming'] = db.getStreamingRank(lastWeek, songId)
             featureDict['MVView'], featureDict['MVSocialInteraction'] = db.getIMVDBData(iterWeek, songId)
             featureDict['MTVReviewCount'], featureDict['MTVReviewScore'] = db.getMTVReviewData(iterWeek, songId, useAlchemyAPI) 
             featureDict['youtubeCommentCount'], featureDict['youtubeCommentScore'] = db.getYoutubeData(iterWeek, songId, useAlchemyAPI)
             featureDict['twitterCount'], featureDict['twitterScore'] = db.getTwitterData(iterWeek, songId, useAlchemyAPI)
             featureDict['rank'] = db.getTop50Rank(iterWeek, songId)
             db.insertFeatureToDB(featureDict)
         iterWeek += timedelta(days=7)
 def computeBaseLine(self, baselineType=0):
     iterWeek, endWeek = datetime(2013, 3, 23), datetime(2013, 4, 20)
     db = DBController()
     fg = FeatureGenerator()
     baselineScore = 0
     while iterWeek <= endWeek:
         lastWeek = iterWeek - timedelta(weeks=1)
         featureList = db.getFeatureListByWeek(iterWeek)
         y_pred, y_test = [], []
         for featureVector in featureList:
             songId = featureVector["id"]
             lastWeekRank = db.getTop50Rank(lastWeek, songId)
             if lastWeekRank is None:
                 lastWeekScore = 0
             else:
                 lastWeekScore = fg.rankToPopScore(lastWeekRank)
             currentWeekRank = featureVector["rank"]
             currentWeekScore = fg.rankToPopScore(currentWeekRank) if currentWeekRank is not None else lastWeekScore
             y_pred.append(lastWeekScore)
             y_test.append(currentWeekScore)
         y_pred, y_test = self.getRankArray(numpy.asarray(y_pred)), self.getRankArray(numpy.asarray(y_test))
         if baselineType == 0:
             baselineScore += self.getRankEvalationScore(y_pred, y_test)
         elif baselineType == 1:
             baselineScore += metrics.r2_score(y_pred, y_test)
         else:
             baselineScore += metrics.mean_squared_error(y_pred, y_test)
         iterWeek += timedelta(weeks=1)
     baselineScore = baselineScore / 5
     print baselineScore