Ejemplo n.º 1
0
def importMapFiles(BadFileList, GoodFileList, IDmap):

    good_docs = []
    bad_docs = []

    for badFile in BadFileList:
        mapDict = utility.loadObject(badFile)
        bad_docs.append(importMapFile(mapDict, IDmap))
        print len(IDmap)

    for goodFile in GoodFileList:
        mapDict = utility.loadObject(badFile)
        good_docs.append(importMapFile(mapDict, IDmap))
        print len(IDmap)

    return good_docs, bad_docs
Ejemplo n.º 2
0
def importMapFiles( BadFileList, GoodFileList , IDmap ):
    
    good_docs = []
    bad_docs = []

    for badFile in BadFileList:
        mapDict = utility.loadObject(badFile)
        bad_docs.append(importMapFile(mapDict, IDmap))
        print len(IDmap)

    for goodFile in GoodFileList:
        mapDict = utility.loadObject(badFile)
        good_docs.append(importMapFile(mapDict, IDmap))
        print len(IDmap)


    return good_docs, bad_docs
Ejemplo n.º 3
0
def importGoodReport(sGoodDirectory, dictCurrent, iBadRun):
    listGoodReport = []
    utility.findDesiredFiles(sGoodDirectory, listGoodReport, 'map.obj')
    print 'good:', len(listGoodReport)
    iGoodRun = 0
    for sFile in listGoodReport:
        dictTmp = utility.loadObject(sFile)
        addGoodSample(dictTmp, dictCurrent)
        iGoodRun = iGoodRun + 1
Ejemplo n.º 4
0
def importGoodReport(sGoodDirectory, dictCurrent, iBadRun):
    listGoodReport = []
    utility.findDesiredFiles(sGoodDirectory, listGoodReport, 'map.obj')
    print 'good:', len(listGoodReport)
    iGoodRun = 0
    for sFile in listGoodReport:
        dictTmp = utility.loadObject(sFile)
        addGoodSample( dictTmp, dictCurrent )
        iGoodRun = iGoodRun + 1
Ejemplo n.º 5
0
def importBadReport(sBadDirectory, dictCurrent, setCallSite):
    listBadReport = []
    utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj')
    print 'bad:', len(listBadReport)
    iBadRun = 0
    for sFile in listBadReport:
        dictTmp = utility.loadObject(sFile)
        addBadSample(dictTmp, dictCurrent, setCallSite)
        iBadRun = iBadRun + 1
    return iBadRun
Ejemplo n.º 6
0
def importBadReport(sBadDirectory, dictCurrent, setCallSite):
    listBadReport = []
    utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj')
    print 'bad:', len(listBadReport)
    iBadRun = 0
    for sFile in listBadReport:
        dictTmp = utility.loadObject(sFile)
        addBadSample( dictTmp, dictCurrent , setCallSite)
        iBadRun = iBadRun + 1
    return iBadRun
Ejemplo n.º 7
0
def importBadReport(sBadDirectory, dictCurrent, mapInstruction, mapFunction):
    listBadReport = []
    utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj')
    print 'bad:', len(listBadReport)
    iBadRun = 0
    for sFile in listBadReport:
        #print sFile
        dictTmp = utility.loadObject(sFile)
        #print dictTmp[int('c57ad7', 16)]
        addBadSample(dictTmp, dictCurrent, mapInstruction, mapFunction)
        iBadRun = iBadRun + 1
    return iBadRun
Ejemplo n.º 8
0
def importBadReport(sBadDirectory, dictCurrent, mapInstruction, mapFunction ):
    listBadReport = []
    utility.findDesiredFiles(sBadDirectory, listBadReport, 'map.obj')
    print 'bad:', len(listBadReport)
    iBadRun = 0
    for sFile in listBadReport:
        #print sFile
        dictTmp = utility.loadObject(sFile)
        #print dictTmp[int('c57ad7', 16)]
        addBadSample( dictTmp, dictCurrent, mapInstruction, mapFunction )
        iBadRun = iBadRun + 1
    return iBadRun
Ejemplo n.º 9
0
import string
import re
import os
import sys
import commands
import pickle
import glob
import math
import utility
import gc
from sets import Set



if __name__ == '__main__':
    sReportDirectory = sys.argv[1]
    listReport = []
    utility.findDesiredFiles(sReportDirectory, listReport, 'sample.count.obj')
    iCount = 0
    for report in listReport:
        print report
        l = utility.loadObject(report)
        print l
        iCount += l[0]

    print len(listReport), iCount, iCount*1.0/len(listReport)
        #exit(0)
Ejemplo n.º 10
0
def print_rank(finalResult, badDict):
    rank = 0
    for (key, value) in sorted(finalResult.iteritems(),
                               key=lambda d: d[1],
                               reverse=True):
        strTmp = key.split('_')
        print strTmp[0], strTmp[1], badDict[int(strTmp[0], 16)], value

        rank = rank + 1
        if rank == 100:
            break


if __name__ == '__main__':
    setCallSite = utility.loadObject(sys.argv[1])
    sBadDirectory = sys.argv[2]
    sGoodDirectory = sys.argv[3]

    dictCurrent = {}
    iBadRun = importBadReport(sBadDirectory, dictCurrent, setCallSite)
    importGoodReport(sGoodDirectory, dictCurrent, iBadRun)

    count = 0
    for key in dictCurrent:
        for num in range(0, 6):
            if dictCurrent[key][num] > 0:
                count += 1

    print 'total predicate:', len(setCallSite) * 6
    print len(dictCurrent) * 6
Ejemplo n.º 11
0
        dictTmp = utility.loadObject(sFile)
        addGoodSample( dictTmp, dictCurrent )
        iGoodRun = iGoodRun + 1

def print_rank(finalResult, badDict):
    rank = 0 
    for (key, value) in sorted(finalResult.iteritems(), key = lambda d:d[1], reverse = True ):
        strTmp = key.split('_')
        print strTmp[0], strTmp[1], badDict[int(strTmp[0], 16)], value 

        rank = rank + 1     
        if rank == 100:
            break   

if __name__ == '__main__':
    setCallSite = utility.loadObject(sys.argv[1])
    sBadDirectory = sys.argv[2]
    sGoodDirectory = sys.argv[3]

    dictCurrent = {}
    iBadRun = importBadReport(sBadDirectory, dictCurrent, setCallSite)
    importGoodReport(sGoodDirectory, dictCurrent, iBadRun)

    count = 0
    for key in dictCurrent:
        for num in range(0,6):
            if dictCurrent[key][num] > 0:
                count += 1

    print 'total predicate:', len(setCallSite) * 6
    print len(dictCurrent) * 6
    def __init__(self, videoPath):
        self.videoPath = videoPath

        SIFTfeatures = [] #
        imageNames = [] # name of each image

        # Read in video frames
        for item in os.listdir(videoPath):
            imagePath = videoPath +"/"+ item
            locations, features = sift.siftFeature(imagePath)

            SIFTfeatures.append(features)
            imageNames.append(item)

        # Histogramize each image
        imageHistograms = []
        vocabulary = util.loadObject("data/voc.pkl")
        vocSize = len(vocabulary)

        for imageFeature in SIFTfeatures:
            imageFeature = util.normalizeSIFT(imageFeature)
            histogram = self.buildHistogram(imageFeature, vocabulary)
            imageHistograms.append(histogram)

        imageHistograms = np.array(imageHistograms)

        self.imageNames = imageNames
        self.imageHistograms = imageHistograms
        self.SIFTfeatures = SIFTfeatures

        # Cluster frames
        self.numOfFrames = len(imageNames)
        self.numOfCentriods = int(self.numOfFrames / 10)

        kmeans = KMeans(init="k-means++", n_clusters=self.numOfCentriods, n_init=10)
        kmeans.fit(self.imageHistograms)
        cluster_centroids = kmeans.cluster_centers_

        # Get components of each cluter
        codes, distance = vq(self.imageHistograms, cluster_centroids)

        dict = {}
        indice = 0
        for code in codes:
            keys = dict.keys()
            if str(code) in keys:
                dict[str(code)].append(indice)
            else:
                dict[str(code)] = []
                dict[str(code)].append(indice)

            indice += 1

        # stack all SIFT features to perform PCA
        stackOfSIFTfeatures = SIFTfeatures[0]
        for eachFeature in SIFTfeatures[1:]:
            stackOfSIFTfeatures = np.vstack((stackOfSIFTfeatures, eachFeature))

        V,S, mean = pca.pca(stackOfSIFTfeatures)
        self.V = V

        # Perform near duplicate within each cluster
        KEYFRAMES = []

        keys = dict.keys()
        for key in keys:
            cluster = dict[key]
            clusterFeatures = []
            for i in cluster:
                clusterFeatures.append(self.SIFTfeatures[i])

            potentialKeyFrames = self.identifyKeyFrame(clusterFeatures, cluster)
            KEYFRAMES += potentialKeyFrames
            print str(cluster) +": "+ str(potentialKeyFrames)


        self.keyFrames = KEYFRAMES

        compressedHistogram = self.imageHistograms[KEYFRAMES[0]]
        compressedImageName = [self.imageNames[KEYFRAMES[0]]]

        for keyframe in KEYFRAMES[1:]:
            compressedHistogram = np.vstack((compressedHistogram, self.imageHistograms[keyframe]))
            compressedImageName.append(self.imageNames[keyframe])

        self.compressedHistogram = compressedHistogram
        self.compressedImageName = compressedImageName