def test_picbelowthreshold(self): medias = Media.objects.filter(moods = Mood.HAPPY, content_type=Media.PICTURE) m1 = medias[0] m2 = medias[len(medias) - 1] m3 = medias[1] s1 = Score.objects.get(media = m1) s2 = Score.objects.get(media = m2) s3 = Score.objects.get(media = m3) s1.final_score = ScoreFilter.THRESHOLD s2.final_score = ScoreFilter.THRESHOLD - 1 s3.final_score = ScoreFilter.THRESHOLD + 1 s1.save() s2.save() s3.save() self.assertTrue(ScoreFilter.filter(Mood.HAPPY, Media.PICTURE) == 1) # filtered one m1_arr = Score.objects.filter(media=m1) m2_arr = Score.objects.filter(media=m2) m3_arr = Score.objects.filter(media=m3) self.assertEqual(1, len(m1_arr)) self.assertEqual(0, len(m2_arr)) self.assertEqual(1, len(m3_arr))
def test_emptyDatabase(self): Media.objects.all().delete() Picture.objects.all().delete() Rank.objects.all().delete() Score.objects.all().delete() self.assertEqual(ScoreFilter.EMPTY_DB, ScoreFilter.filter(Mood.HAPPY, Media.PICTURE))
def scoreThresholdFilterCronJob(mood = Mood.HAPPY, content = Media.PICTURE): return ScoreFilter.filter(mood, content)
def pullAndFilter(mood, terms, add_num, partition_num): # get pics pics = flickr.photos_search(tags=terms,\ safe_search=1,\ per_page=_SEARCH_NUM) # calculate how many pictures to do length = len(pics[0]) # myLen = min(partition_num, length) myLen = length if partition_num > length else partition_num myLen = max(myLen, 0) #generate a random index startIndex = random.randint(0, max(length - myLen, 0)) #generate map for keeping track picture_map = range(0, myLen) random.shuffle(picture_map) added_picture_list = [] #looping myLen times, get out n number of pictures for i in range(0, myLen): first_attrib = None photo_id = 0 print 'picture progress: ' + str(i) + '/' + str(myLen) while len(picture_map) != 0: idx = (picture_map.pop() + startIndex) % length first_attrib = pics[0][idx].attrib photo_id = int(first_attrib['id']) initial_score = computePictureScore(photo_id, mood) del_list = Deleted.objects.filter(content_type=Media.PICTURE, content_id=photo_id) if len(del_list) != 0: continue try: # throw error if bad things happen flickr.photos_getInfo(photo_id=photo_id) except Exception: continue try: # throw error Picture.objects.get(flickr_id=photo_id) # photo id already exist in database if len(picture_map) == 0: raise Exception('I FAILED!!') except Exception: break try: url = _getURL(first_attrib) added_picture_list.append((photo_id, url, mood, initial_score)) except Exception: print 'The None type url error again, die gracefully' sorted_picture_list = sorted(added_picture_list, key=lambda tuple: tuple[3], reverse=True) # add till database is full preadd = min(add_num, len(sorted_picture_list)) for i in range(0, preadd): tuple = sorted_picture_list.pop(0) assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3]) # get media data medias = Media.objects.filter(moods=mood, content_type=Media.PICTURE) ScoreFilter._calculateFinalScore(medias) mediaData = Media.objects.filter(moods=mood, score__final_score__gt=-1).order_by('score__final_score') # allData = Media.objects.filter(moods=mood, content_type=Media.PICTURE) # dataDict = {} # for m in allData: # if m.score.final_score == -1: # dataDict[m] = m.score.initial_score # else: # dataDict[m] = m.score.final_score # # for key, value in sorted(dataDict.iteritems(), key=lambda (k,v): (v,k)): # mediaData.append(key) if len(mediaData) == 0: return preadd #Add to database if score is higher than existing scores lowestMediaIndex = 0 while len(sorted_picture_list) != 0: tuple = sorted_picture_list.pop(0) score = tuple[3] if score > mediaData[lowestMediaIndex].score.final_score: destory(mediaData[lowestMediaIndex].id, Media.PICTURE) assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3]) lowestMediaIndex += 1 if lowestMediaIndex >= len(mediaData): break else: break return preadd + lowestMediaIndex
def test_noPictureHasFinal(self): #medias = Media.objects.filter(moods=Mood.HAPPY, content_type=Media.PICTURE) self.assertEqual(ScoreFilter.NO_FINAL_SCORE, ScoreFilter.filter(Mood.HAPPY, Media.PICTURE))
def pullAndFilter(mood, terms, add_num, partition_num): # get _SEARCH_NUM of video from Youtube entries = _getEntries(yt, terms, _SEARCH_NUM) #calculate how many video to do length = len(entries) myLen = length if partition_num > length else partition_num myLen = max(myLen, 0) #generate a random index startIndex = random.randint(0, max(length - myLen, 0)) startIndex = 149 # map to decide index video_map = range(0, myLen) # the video added list added_video_list = [] for i in range(0, myLen): vid_id = None print 'video progress: ' + str(i) + '/' + str(myLen) while len(video_map) != 0: # grab picture idx = (video_map.pop() + startIndex) % length entry = entries[idx] entryid = _parseId(entry) #compute score initial_score = computeVideoScore(entry, mood) #check if data is deleted before del_list = Deleted.objects.filter(content_type=Media.VIDEO, content_id=entryid) if len(del_list) != 0: continue #check if valid data try: Video.objects.get(youtube_id=entryid) if len(video_map) == 0: # fail gracefully print 'Max out video search results' except Exception: vid_id = entryid break # add to list try: url = _getURL(vid_id) added_video_list.append((vid_id, url, mood, initial_score)) except Exception: print 'The None type url error again, die gracefully' # sort by score sorted_video_list = sorted(added_video_list, key=lambda tuple: tuple[3], reverse=True) # Add to our database the number of data needed preadd = min(add_num, len(sorted_video_list)) for i in range(0, preadd): tuple = sorted_video_list.pop(0) if tuple[0] != None: assert Video.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3]) medias = Media.objects.filter(moods=mood, content_type=Media.VIDEO) ScoreFilter._calculateFinalScore(medias) #mediaData = Media.objects.filter(moods=mood, score__final_score__gt=-1).order_by('score__final_score') mediaData = Media.objects.filter(moods=mood, score__final_score__gt=-1).order_by('score__final_score') # allData = Media.objects.filter(moods=mood, content_type=Media.VIDEO) # dataDict = {} # for m in allData: # if m.score.final_score == -1: # dataDict[m] = m.score.initial_score # else: # dataDict[m] = m.score.final_score # # for key, value in sorted(dataDict.iteritems(), key=lambda (k,v): (v,k)): # mediaData.append(key) if len(mediaData) == 0: return preadd # Add to database if score is higher than existing scores lowestMediaIndex = 0 while len(sorted_video_list) != 0: tuple = sorted_video_list.pop(0) score = tuple[3] if score > mediaData[lowestMediaIndex].score.final_score: destory(mediaData[lowestMediaIndex].id, Media.VIDEO) assert Video.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3]) lowestMediaIndex += 1 if lowestMediaIndex >= len(mediaData): break else: break return preadd + lowestMediaIndex
def scoreThresholdFilterCronJob(mood=Mood.HAPPY, content=Media.PICTURE): return ScoreFilter.filter(mood, content)
def pullAndFilter(mood, terms, add_num, partition_num): # get pics pics = flickr.photos_search(tags=terms,\ safe_search=1,\ per_page=_SEARCH_NUM) # calculate how many pictures to do length = len(pics[0]) # myLen = min(partition_num, length) myLen = length if partition_num > length else partition_num myLen = max(myLen, 0) #generate a random index startIndex = random.randint(0, max(length - myLen, 0)) #generate map for keeping track picture_map = range(0, myLen) random.shuffle(picture_map) added_picture_list = [] #looping myLen times, get out n number of pictures for i in range(0, myLen): first_attrib = None photo_id = 0 print 'picture progress: ' + str(i) + '/' + str(myLen) while len(picture_map) != 0: idx = (picture_map.pop() + startIndex) % length first_attrib = pics[0][idx].attrib photo_id = int(first_attrib['id']) initial_score = computePictureScore(photo_id, mood) del_list = Deleted.objects.filter(content_type=Media.PICTURE, content_id=photo_id) if len(del_list) != 0: continue try: # throw error if bad things happen flickr.photos_getInfo(photo_id=photo_id) except Exception: continue try: # throw error Picture.objects.get(flickr_id=photo_id) # photo id already exist in database if len(picture_map) == 0: raise Exception('I FAILED!!') except Exception: break try: url = _getURL(first_attrib) added_picture_list.append((photo_id, url, mood, initial_score)) except Exception: print 'The None type url error again, die gracefully' sorted_picture_list = sorted(added_picture_list, key=lambda tuple: tuple[3], reverse=True) # add till database is full preadd = min(add_num, len(sorted_picture_list)) for i in range(0, preadd): tuple = sorted_picture_list.pop(0) assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3]) # get media data medias = Media.objects.filter(moods=mood, content_type=Media.PICTURE) ScoreFilter._calculateFinalScore(medias) mediaData = Media.objects.filter( moods=mood, score__final_score__gt=-1).order_by('score__final_score') # allData = Media.objects.filter(moods=mood, content_type=Media.PICTURE) # dataDict = {} # for m in allData: # if m.score.final_score == -1: # dataDict[m] = m.score.initial_score # else: # dataDict[m] = m.score.final_score # # for key, value in sorted(dataDict.iteritems(), key=lambda (k,v): (v,k)): # mediaData.append(key) if len(mediaData) == 0: return preadd #Add to database if score is higher than existing scores lowestMediaIndex = 0 while len(sorted_picture_list) != 0: tuple = sorted_picture_list.pop(0) score = tuple[3] if score > mediaData[lowestMediaIndex].score.final_score: destory(mediaData[lowestMediaIndex].id, Media.PICTURE) assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3]) lowestMediaIndex += 1 if lowestMediaIndex >= len(mediaData): break else: break return preadd + lowestMediaIndex