Example #1
0
 def test_picbelowthreshold(self):
     medias = Media.objects.filter(moods = Mood.HAPPY, content_type=Media.PICTURE)
     m1 = medias[0]
     m2 = medias[len(medias) - 1]
     m3 = medias[1]
     
     s1 = Score.objects.get(media = m1)
     s2 = Score.objects.get(media = m2)
     s3 = Score.objects.get(media = m3)
     
     s1.final_score = ScoreFilter.THRESHOLD
     s2.final_score = ScoreFilter.THRESHOLD - 1
     s3.final_score = ScoreFilter.THRESHOLD + 1
     s1.save()
     s2.save()
     s3.save()
     
     
     self.assertTrue(ScoreFilter.filter(Mood.HAPPY, Media.PICTURE) == 1) # filtered one
     
     m1_arr = Score.objects.filter(media=m1)
     m2_arr = Score.objects.filter(media=m2)
     m3_arr = Score.objects.filter(media=m3)
     
     self.assertEqual(1, len(m1_arr))
     self.assertEqual(0, len(m2_arr))
     self.assertEqual(1, len(m3_arr))
Example #2
0
 def test_emptyDatabase(self):
     Media.objects.all().delete()
     Picture.objects.all().delete()
     Rank.objects.all().delete()
     Score.objects.all().delete()
     
     self.assertEqual(ScoreFilter.EMPTY_DB, ScoreFilter.filter(Mood.HAPPY, Media.PICTURE))
 def scoreThresholdFilterCronJob(mood = Mood.HAPPY, content = Media.PICTURE):
     return ScoreFilter.filter(mood, content)
def pullAndFilter(mood, terms, add_num, partition_num):
    # get pics
    pics = flickr.photos_search(tags=terms,\
                                safe_search=1,\
                                per_page=_SEARCH_NUM)
    
    # calculate how many pictures to do
    length = len(pics[0])
    
    # myLen = min(partition_num, length)
    myLen = length if partition_num > length else partition_num
    myLen = max(myLen, 0)
    
    #generate a random index
    startIndex = random.randint(0, max(length - myLen, 0))
    
    #generate map for keeping track
    picture_map = range(0, myLen)
    random.shuffle(picture_map)
    
    added_picture_list = []
    
    
    #looping myLen times, get out n number of pictures
    for i in range(0, myLen):
        first_attrib = None
        photo_id = 0
        
        print 'picture progress: ' + str(i) + '/' + str(myLen)
        while len(picture_map) != 0:
            idx = (picture_map.pop() + startIndex) % length
            first_attrib = pics[0][idx].attrib
            photo_id = int(first_attrib['id'])
            initial_score = computePictureScore(photo_id, mood)
            
            del_list = Deleted.objects.filter(content_type=Media.PICTURE, content_id=photo_id)
            if len(del_list) != 0:
                continue
            
            try:
                # throw error if bad things happen
                flickr.photos_getInfo(photo_id=photo_id)
            except Exception:
                continue
            
            try:                    
                # throw error 
                Picture.objects.get(flickr_id=photo_id)
                # photo id already exist in database
                if len(picture_map) == 0:
                    raise Exception('I FAILED!!')
            except Exception:
                break
    
        try:
            url = _getURL(first_attrib)
            added_picture_list.append((photo_id, url, mood, initial_score))
        except Exception:
            print 'The None type url error again, die gracefully'
        
    sorted_picture_list = sorted(added_picture_list, key=lambda tuple: tuple[3], reverse=True)
    
    # add till database is full
    preadd = min(add_num, len(sorted_picture_list))
    for i in range(0, preadd):
        tuple = sorted_picture_list.pop(0)
        assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3])

    # get media data    
    medias = Media.objects.filter(moods=mood, content_type=Media.PICTURE)
    ScoreFilter._calculateFinalScore(medias)
    
    mediaData = Media.objects.filter(moods=mood, score__final_score__gt=-1).order_by('score__final_score')
#    allData = Media.objects.filter(moods=mood, content_type=Media.PICTURE)
#    dataDict = {}
#    for m in allData:
#        if m.score.final_score == -1:
#            dataDict[m] = m.score.initial_score
#        else:
#            dataDict[m] = m.score.final_score
#    
#    for key, value in sorted(dataDict.iteritems(), key=lambda (k,v): (v,k)):
#        mediaData.append(key)
                                        
    if len(mediaData) == 0:
        return preadd
    
    #Add to database if score is higher than existing scores
    lowestMediaIndex = 0
    while len(sorted_picture_list) != 0:
        tuple = sorted_picture_list.pop(0)
        score = tuple[3]
        if score > mediaData[lowestMediaIndex].score.final_score:
            destory(mediaData[lowestMediaIndex].id, Media.PICTURE)
            assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3])
            
            lowestMediaIndex += 1
            if lowestMediaIndex >= len(mediaData):
                break
        else:
            break
        
    return preadd + lowestMediaIndex
Example #5
0
 def test_noPictureHasFinal(self):
     #medias = Media.objects.filter(moods=Mood.HAPPY, content_type=Media.PICTURE)
     self.assertEqual(ScoreFilter.NO_FINAL_SCORE, ScoreFilter.filter(Mood.HAPPY, Media.PICTURE))
Example #6
0
def pullAndFilter(mood, terms, add_num, partition_num):
    # get _SEARCH_NUM of video from Youtube
    entries = _getEntries(yt, terms, _SEARCH_NUM)
    
    #calculate how many video to do
    length = len(entries)
    myLen = length if partition_num > length else partition_num
    myLen = max(myLen, 0)
    
    #generate a random index
    startIndex = random.randint(0, max(length - myLen, 0))
    startIndex = 149
    
    # map to decide index
    video_map = range(0, myLen)
    
    # the video added list
    added_video_list = []
    
    for i in range(0, myLen):
        vid_id = None
        
        print 'video progress: ' + str(i) + '/' + str(myLen)
        while len(video_map) != 0:
            # grab picture
            idx = (video_map.pop() + startIndex) % length
            entry = entries[idx]
            entryid = _parseId(entry)
            
            #compute score
            initial_score = computeVideoScore(entry, mood)
            
            #check if data is deleted before
            del_list = Deleted.objects.filter(content_type=Media.VIDEO, content_id=entryid)
            if len(del_list) != 0:
                continue
            
            #check if valid data
            try:
                Video.objects.get(youtube_id=entryid)
                if len(video_map) == 0:
                    # fail gracefully
                    print 'Max out video search results'
                    
            except Exception:
                vid_id = entryid 
                break
        
        # add to list
        try:    
            url = _getURL(vid_id)
            added_video_list.append((vid_id, url, mood, initial_score))
        except Exception:
            print 'The None type url error again, die gracefully'
    
    # sort by score
    sorted_video_list = sorted(added_video_list, key=lambda tuple: tuple[3], reverse=True)
    
    # Add to our database the number of data needed
    preadd = min(add_num, len(sorted_video_list))
    for i in range(0, preadd):
        tuple = sorted_video_list.pop(0)
        if tuple[0] != None:
            assert Video.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3])
        
    medias = Media.objects.filter(moods=mood, content_type=Media.VIDEO)
    ScoreFilter._calculateFinalScore(medias)
    
    #mediaData = Media.objects.filter(moods=mood, score__final_score__gt=-1).order_by('score__final_score')
    mediaData = Media.objects.filter(moods=mood, score__final_score__gt=-1).order_by('score__final_score')
#    allData = Media.objects.filter(moods=mood, content_type=Media.VIDEO)
#    dataDict = {}
#    for m in allData:
#        if m.score.final_score == -1:
#            dataDict[m] = m.score.initial_score
#        else:
#            dataDict[m] = m.score.final_score
#    
#    for key, value in sorted(dataDict.iteritems(), key=lambda (k,v): (v,k)):
#        mediaData.append(key)
    
    if len(mediaData) == 0:
        return preadd
    
    # Add to database if score is higher than existing scores
    lowestMediaIndex = 0
    while len(sorted_video_list) != 0:
        tuple = sorted_video_list.pop(0)
        score = tuple[3]
        if score > mediaData[lowestMediaIndex].score.final_score:
            destory(mediaData[lowestMediaIndex].id, Media.VIDEO)
            assert Video.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3])
            
            lowestMediaIndex += 1
            if lowestMediaIndex >= len(mediaData):
                break
        else:
            break
        
    return preadd + lowestMediaIndex
 def scoreThresholdFilterCronJob(mood=Mood.HAPPY, content=Media.PICTURE):
     return ScoreFilter.filter(mood, content)
def pullAndFilter(mood, terms, add_num, partition_num):
    # get pics
    pics = flickr.photos_search(tags=terms,\
                                safe_search=1,\
                                per_page=_SEARCH_NUM)

    # calculate how many pictures to do
    length = len(pics[0])

    # myLen = min(partition_num, length)
    myLen = length if partition_num > length else partition_num
    myLen = max(myLen, 0)

    #generate a random index
    startIndex = random.randint(0, max(length - myLen, 0))

    #generate map for keeping track
    picture_map = range(0, myLen)
    random.shuffle(picture_map)

    added_picture_list = []

    #looping myLen times, get out n number of pictures
    for i in range(0, myLen):
        first_attrib = None
        photo_id = 0

        print 'picture progress: ' + str(i) + '/' + str(myLen)
        while len(picture_map) != 0:
            idx = (picture_map.pop() + startIndex) % length
            first_attrib = pics[0][idx].attrib
            photo_id = int(first_attrib['id'])
            initial_score = computePictureScore(photo_id, mood)

            del_list = Deleted.objects.filter(content_type=Media.PICTURE,
                                              content_id=photo_id)
            if len(del_list) != 0:
                continue

            try:
                # throw error if bad things happen
                flickr.photos_getInfo(photo_id=photo_id)
            except Exception:
                continue

            try:
                # throw error
                Picture.objects.get(flickr_id=photo_id)
                # photo id already exist in database
                if len(picture_map) == 0:
                    raise Exception('I FAILED!!')
            except Exception:
                break

        try:
            url = _getURL(first_attrib)
            added_picture_list.append((photo_id, url, mood, initial_score))
        except Exception:
            print 'The None type url error again, die gracefully'

    sorted_picture_list = sorted(added_picture_list,
                                 key=lambda tuple: tuple[3],
                                 reverse=True)

    # add till database is full
    preadd = min(add_num, len(sorted_picture_list))
    for i in range(0, preadd):
        tuple = sorted_picture_list.pop(0)
        assert Picture.add(tuple[0], tuple[1], tuple[2], initialScore=tuple[3])

    # get media data
    medias = Media.objects.filter(moods=mood, content_type=Media.PICTURE)
    ScoreFilter._calculateFinalScore(medias)

    mediaData = Media.objects.filter(
        moods=mood, score__final_score__gt=-1).order_by('score__final_score')
    #    allData = Media.objects.filter(moods=mood, content_type=Media.PICTURE)
    #    dataDict = {}
    #    for m in allData:
    #        if m.score.final_score == -1:
    #            dataDict[m] = m.score.initial_score
    #        else:
    #            dataDict[m] = m.score.final_score
    #
    #    for key, value in sorted(dataDict.iteritems(), key=lambda (k,v): (v,k)):
    #        mediaData.append(key)

    if len(mediaData) == 0:
        return preadd

    #Add to database if score is higher than existing scores
    lowestMediaIndex = 0
    while len(sorted_picture_list) != 0:
        tuple = sorted_picture_list.pop(0)
        score = tuple[3]
        if score > mediaData[lowestMediaIndex].score.final_score:
            destory(mediaData[lowestMediaIndex].id, Media.PICTURE)
            assert Picture.add(tuple[0],
                               tuple[1],
                               tuple[2],
                               initialScore=tuple[3])

            lowestMediaIndex += 1
            if lowestMediaIndex >= len(mediaData):
                break
        else:
            break

    return preadd + lowestMediaIndex