def extractFeatureToDB(self, beginWeek, endWeek=datetime.today(), isReload=False, useAlchemyAPI=False): if beginWeek < datetime(2007, 1, 7) or endWeek > datetime.today(): raise Exception('Invalid input date!') beginWeek, endWeek = dateToSaturday(beginWeek), dateToSaturday(endWeek) endWeek = endWeek - timedelta(days=7) if endWeek > datetime.today() else endWeek iterWeek = beginWeek db = DBController() while iterWeek <= endWeek: lastWeek = iterWeek - timedelta(days=7) songRankList = db.getSongIdListByWeek(lastWeek) for songId in songRankList: if isReload == False and db.isFeatureInDB(iterWeek, songId): continue featureDict = {} featureDict['id'] = songId featureDict['week'] = iterWeek featureDict['sales'] = db.getSalesRank(lastWeek, songId) featureDict['radio'] = db.getRadioRank(lastWeek, songId) featureDict['streaming'] = db.getStreamingRank(lastWeek, songId) featureDict['MVView'], featureDict['MVSocialInteraction'] = db.getIMVDBData(iterWeek, songId) featureDict['MTVReviewCount'], featureDict['MTVReviewScore'] = db.getMTVReviewData(iterWeek, songId, useAlchemyAPI) featureDict['youtubeCommentCount'], featureDict['youtubeCommentScore'] = db.getYoutubeData(iterWeek, songId, useAlchemyAPI) featureDict['twitterCount'], featureDict['twitterScore'] = db.getTwitterData(iterWeek, songId, useAlchemyAPI) featureDict['rank'] = db.getTop50Rank(iterWeek, songId) db.insertFeatureToDB(featureDict) iterWeek += timedelta(days=7)
def computeBaseLine(self, baselineType=0): iterWeek, endWeek = datetime(2013, 3, 23), datetime(2013, 4, 20) db = DBController() fg = FeatureGenerator() baselineScore = 0 while iterWeek <= endWeek: lastWeek = iterWeek - timedelta(weeks=1) featureList = db.getFeatureListByWeek(iterWeek) y_pred, y_test = [], [] for featureVector in featureList: songId = featureVector["id"] lastWeekRank = db.getTop50Rank(lastWeek, songId) if lastWeekRank is None: lastWeekScore = 0 else: lastWeekScore = fg.rankToPopScore(lastWeekRank) currentWeekRank = featureVector["rank"] currentWeekScore = fg.rankToPopScore(currentWeekRank) if currentWeekRank is not None else lastWeekScore y_pred.append(lastWeekScore) y_test.append(currentWeekScore) y_pred, y_test = self.getRankArray(numpy.asarray(y_pred)), self.getRankArray(numpy.asarray(y_test)) if baselineType == 0: baselineScore += self.getRankEvalationScore(y_pred, y_test) elif baselineType == 1: baselineScore += metrics.r2_score(y_pred, y_test) else: baselineScore += metrics.mean_squared_error(y_pred, y_test) iterWeek += timedelta(weeks=1) baselineScore = baselineScore / 5 print baselineScore