Exemple #1
0
def load_cifar10():
    data_train = []
    label_train = []
    # 融合训练集
    dic = unpickle('test_batch')
    pic = dic[b'data'].reshape(10000, 3, 32, 32)
    gist = []
    for img in pic:
        i0 = Image.fromarray(img[0])  # 从数据,生成image对象
        i1 = Image.fromarray(img[1])
        i2 = Image.fromarray(img[2])
        img = Image.merge("RGB", (i0, i1, i2))
        des = leargist.color_gist(img)
        gist.append(des)
    newdic = {b'gists': gist, b'labels': dic[b'labels']}
    print("测试计算完毕")
    fp = open('gists_test', 'wb')
    pickle.dump(newdic, fp)
    fp.close()
    for i in range(1, 6):
        dic = unpickle('data_batch_' + str(i))
        pic = dic[b'data'].reshape(10000, 3, 32, 32)
        gist=[]
        for img in pic:
            i0 = Image.fromarray(img[0])  # 从数据,生成image对象
            i1 = Image.fromarray(img[1])
            i2 = Image.fromarray(img[2])
            img = Image.merge("RGB", (i0, i1, i2))
            des = leargist.color_gist(img)
            gist.append(des)
        newdic={b'gists':gist,b'labels':dic[b'labels']}
        print("第%d轮计算完毕"%i)
        fp = open('gists_batch_' + str(i), 'wb')
        pickle.dump(newdic,fp)
        fp.close()
def main():
    PROJECT_DIR = os.path.abspath(os.path.dirname(__file__))
    DATA_DIR = "/mnt/data2/photoshopbattle_images_samples"
    INPUT_DIR = os.path.join(PROJECT_DIR, '../data')

    skipDirs = []
    if os.path.exists(os.path.join(INPUT_DIR, 'X.npy')):
        print "Input gist vectors already exist, skipping..."
    else:
        X = []
        submissionList = []
        for dirName, _, fileList in os.walk(DATA_DIR):
            if len(fileList) > 1:
                submission = sorted(fileList)[0]
                # if no parent, skip
                if submission.count("_") > 1:
                    print "Skipping {} with files {}".format(dirName, fileList)
                    skipDirs.append(dirName)
                else:
                    fullPath = os.path.join(dirName, submission)
                    print "Computing gist vector for {}...".format(fullPath)
                    try:
                        im = Image.open(fullPath)
                        X.append(leargist.color_gist(im, orientations=(4,4,2)))
                        submissionList.append(submission.rsplit('.', 1)[0])
                        if len(submissionList) % 500 == 0:
                            print "Computed gist vectors for {} images".format(len(submissionList))
                            print "=" * 50
                    except:
                        print "Unable to open image {}".format(fullPath)
        X = np.array(X)

        print "Saving input gist vectors and submission list"
        np.save(os.path.join(INPUT_DIR, 'X.npy'), X)
        pickle.dump(submissionList, open(os.path.join(INPUT_DIR, 'submissions'), 'wb'))

    if os.path.exists(os.path.join(INPUT_DIR, 'Q.npy')):
        print "Query gist vectors already exist, exiting..."
        sys.exit()

    Q = []
    queryList = []
    print "=" * 50
    for dirName, _, fileList in os.walk(DATA_DIR):
        if len(fileList) > 1 and dirName not in skipDirs:
            print "Computing gist vectors for query images..."
            for query in sorted(fileList)[1:]:
                fullPath = os.path.join(dirName, query)
                print "Computing gist vector for query {}...".format(fullPath)
                try:
                    im = Image.open(fullPath)
                    Q.append(leargist.color_gist(im, orientations=(4,4,2)))
                    queryList.append(query)
                except:
                    print "Unable to open image {}".format(fullPath)
    Q = np.array(Q)

    print "Saving query gist vectors and query list"
    np.save(os.path.join(INPUT_DIR, 'Q.npy'), Q)
    pickle.dump(queryList, open(os.path.join(INPUT_DIR, 'queries'), 'wb'))
Exemple #3
0
def main():
    PROJECT_DIR = os.path.abspath(os.path.dirname(__file__))
    DATA_DIR = "/mnt/data2/photoshopbattle_images_samples"
    INPUT_DIR = os.path.join(PROJECT_DIR, '../data')

    if os.path.exists(os.path.join(INPUT_DIR, 'X-cp.npy')) or os.path.exists(
            os.path.join(INPUT_DIR, 'Q-cp.npy')):
        print "Input/query gist vectors already exist, skipping..."
    else:
        X = []
        Q = []
        candidateList = []
        queryList = []
        for dirName, _, fileList in os.walk(DATA_DIR):
            if len(fileList) > 1:
                sortedFileList = sorted(fileList)
                parent = sortedFileList[0]
                # if no parent, skip
                if parent.count("_") > 1:
                    print "Skipping {} with files {}".format(dirName, fileList)
                else:
                    # children as candidates
                    for child in sortedFileList[1:]:
                        assert child.count("_") > 1
                        childFullPath = os.path.join(dirName, child)
                        print "Computing gist vector for child {}...".format(
                            childFullPath)
                        try:
                            im = Image.open(childFullPath)
                            X.append(
                                leargist.color_gist(im,
                                                    orientations=(4, 4, 2)))
                            candidateList.append(child)
                        except:
                            print "Unable to open image {}!!!".format(fullPath)
                    # parent as query
                    parentFullPath = os.path.join(dirName, parent)
                    print "Computing gist vector for parent {}...".format(
                        parentFullPath)
                    try:
                        im = Image.open(parentFullPath)
                        Q.append(
                            leargist.color_gist(im, orientations=(4, 4, 2)))
                        queryList.append(parent)
                    except:
                        print "Unable to open image {}".format(fullPath)
        X = np.array(X)
        print "Saving input gist vectors and submission list..."
        np.save(os.path.join(INPUT_DIR, 'X-cp.npy'), X)
        with open(os.path.join(INPUT_DIR, 'candidates-cp'), 'wb') as fileOut:
            pickle.dump(candidateList, fileOut)

        Q = np.array(Q)
        print "Saving query gist vectors and query list..."
        np.save(os.path.join(INPUT_DIR, 'Q-cp.npy'), Q)
        with open(os.path.join(INPUT_DIR, 'queries-cp'), 'wb') as fileOut:
            pickle.dump(queryList, fileOut)
def test_gist():
    flickr_test_set = np.loadtxt(caffe_root + 'data/flickr_style/test.txt', str, delimiter='\t')
    flickr_test_set_path = [readline.split()[0] for readline in flickr_test_set]
    flickr_test_set_label = [int(readline.split()[1]) for readline in flickr_test_set]

    n = 9
    img = caffe.io.load_image(flickr_test_set_path[n])
    print "true label", flickr_test_set_label[n]

    print leargist.color_gist(img)
    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
Exemple #5
0
def main():
    PROJECT_DIR = os.path.abspath(os.path.dirname(__file__))
    CANDIDATE_DIR = "/mnt/data/reddit_images_2016_06_preprocessed"
    QUERY_DIR = "/mnt/data/reddit_images_2016_06_preprocessed_sampled"
    INPUT_DIR = os.path.join(PROJECT_DIR, '../data')

    skipDirs = []
    if os.path.exists(os.path.join(INPUT_DIR, 'X-reddit.npy')):
        print "Input gist vectors already exist, skipping..."
    else:
        X = []
        submissions = []
        for file in os.listdir(CANDIDATE_DIR):
            fullPath = os.path.join(CANDIDATE_DIR, file)
            print "Computing gist vector for {}".format(fullPath)
            try:
                im = Image.open(fullPath)
                X.append(leargist.color_gist(im, orientations=(4, 4, 2)))
                submissions.append(file)
            except:
                print "Unable to open image {}".format(fullPath)
        X = np.array(X)

        print "Saving input gist vectors and submission names"
        np.save(os.path.join(INPUT_DIR, 'X-reddit.npy'), X)
        with open(os.path.join(INPUT_DIR, 'submissions-reddit'),
                  'wb') as fileOut:
            pickle.dump(submissions, fileOut)

    if os.path.exists(os.path.join(INPUT_DIR, 'Q-reddit.npy')):
        print "Query gist vectors already exist, exiting..."
        sys.exit()

    Q = []
    queries = []
    print "=" * 50
    for file in os.listdir(QUERY_DIR):
        fullPath = os.path.join(QUERY_DIR, file)
        print "Computing gist vector for query {}...".format(fullPath)
        try:
            im = Image.open(fullPath)
            Q.append(leargist.color_gist(im, orientations=(4, 4, 2)))
            queries.append(file)
        except:
            print "Unable to open image {}".format(fullPath)
    Q = np.array(Q)

    print "Saving query gist vectors and query list"
    np.save(os.path.join(INPUT_DIR, 'Q-reddit.npy'), Q)
    with open(os.path.join(INPUT_DIR, 'queries-reddit'), 'wb') as fileOut:
        pickle.dump(queries, fileOut)
Exemple #6
0
def get_img2gist():
    try:
        img2gist = None
        with open(name2gist_file, 'rb') as f:
            print 'loading existed img2gist...'
            sys.stdout.flush()
            img2gist = pickle.load(f)
        return img2gist
    except Exception:
        img2gist = {}
        total_num = 0
        with open(train_file_map, 'r') as f:
            for line in f:
                if line.strip():
                    total_num += 1
        count = 0
        with open(train_file_map, 'r') as f:
            for line in f:
                if line.strip():
                    count += 1
                    arr = line.strip().split()
                    name = arr[0].strip()
                    rpath = arr[1].strip()
                    im = Image.open(pjoin(train_images_dir, rpath))
                    im = crop_resize(im, normal_size, True)
                    desc = leargist.color_gist(im)
                    img2gist[name] = desc
                    sys.stdout.write(
                        '%d/%d\r size:(%d, %d)    ' % (count, total_num, im.size[0], im.size[1]))
                    sys.stdout.flush()
        with open(name2gist_file, 'wb') as f:
            pickle.dump(img2gist, f)
        return img2gist
Exemple #7
0
    def build_index(self):
        if self.is_index_built():
            # index has been built, just load it
            self.load_data()
            return

        feature_num = 960
        bit = 64
        # store the original feature matrix
        data = np.ndarray(shape=(0, feature_num), dtype=np.float32)
        names = []
        for name in os.listdir(self.dir):
            file_path = os.path.join(self.dir, name)
            # don't recursively search file
            if os.path.isfile(file_path):
                im = Image.open(file_path)
                des = leargist.color_gist(im)
                data = np.vstack((data, des))
                names.append(name)
        # apply PCA
        self.mean = data.mean(axis=0)
        data -= self.mean
        u, s, v = np.linalg.svd(np.cov(np.transpose(data)))
        self.pca = u[:, :bit]
        data = data.dot(self.pca)
        # use ITQ to get the best rotation
        codes, self.rotation = utils.itq(data)
        # convert 0-1 codes matrix  to integer
        int_codes = np.zeros(shape=(codes.shape[0], 1), dtype=np.uint64)
        for i in range(codes.shape[1]):
            col = codes[:, i].astype(np.uint64).reshape(codes.shape[0],
                                                        1) * (2 << i)
            int_codes = int_codes + col
        self.cache = zip(int_codes.flatten().tolist(), names)
        self.save_data()
Exemple #8
0
def gist(image_ids, image_filenames, max_size=256):
    import leargist

    good_image_ids = []
    feats = []
    for image_id, filename in zip(image_ids, image_filenames):
        try:
            # TODO: resize image to a smaller size? like 128?
            img = vislab.dataset.get_image_for_filename(filename)
            assert(img.dtype == np.uint8)

            if img.ndim == 2:
                img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
            h, w = img.shape[:2]

            mode = 'RGBA'
            rimg = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
            if len(rimg[0]) == 3:
                rimg = np.c_[rimg, 255 * np.ones((len(rimg), 1), np.uint8)]

            im = Image.frombuffer(
                mode, (w, h), rimg.tostring(), 'raw', mode, 0, 1)
            im.thumbnail((max_size, max_size), Image.ANTIALIAS)
            feat = leargist.color_gist(im)

            good_image_ids.append(image_id)
            feats.append(feat)
        except:
            continue
    return image_ids, feats
Exemple #9
0
    def build_index(self):
        if self.is_index_built():
            # index has been built, just load it
            self.load_data()
            return

        feature_num = 960
        bit = 64
        # store the original feature matrix
        data = np.ndarray(shape = (0, feature_num), dtype = np.float32)
        names = []
        for name in os.listdir(self.dir):
            file_path = os.path.join(self.dir, name)
            # don't recursively search file
            if os.path.isfile(file_path):
                im = Image.open(file_path)
                des = leargist.color_gist(im)
                data = np.vstack((data, des))
                names.append(name)
        # apply PCA
        self.mean = data.mean(axis = 0)
        data -= self.mean
        u, s, v = np.linalg.svd(np.cov(np.transpose(data)))
        self.pca = u[:, :bit]
        data = data.dot(self.pca)
        # use ITQ to get the best rotation
        codes, self.rotation = utils.itq(data)
        # convert 0-1 codes matrix  to integer
        int_codes = np.zeros(shape = (codes.shape[0], 1), dtype = np.uint64)
        for i in range(codes.shape[1]):
            col = codes[:, i].astype(np.uint64).reshape(codes.shape[0], 1) * (2 << i)
            int_codes = int_codes + col
        self.cache = zip(int_codes.flatten().tolist(), names)
        self.save_data()
Exemple #10
0
 def extract_gist_feature(self, img_path):
     import leargist
     resolution = cfg.RETRIEVAL_RESOLUTION
     img = Image.open(img_path)
     img = img.resize((resolution[1], resolution[0]))
     feat = leargist.color_gist(img).flatten()
     return feat
Exemple #11
0
def gist(image_ids, image_filenames, max_size=256):
    import leargist

    good_image_ids = []
    feats = []
    for image_id, filename in zip(image_ids, image_filenames):
        try:
            # TODO: resize image to a smaller size? like 128?
            img = vislab.dataset.get_image_for_filename(filename)
            assert (img.dtype == np.uint8)

            if img.ndim == 2:
                img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
            h, w = img.shape[:2]

            mode = 'RGBA'
            rimg = img.reshape(img.shape[0] * img.shape[1], img.shape[2])
            if len(rimg[0]) == 3:
                rimg = np.c_[rimg, 255 * np.ones((len(rimg), 1), np.uint8)]

            im = Image.frombuffer(mode, (w, h), rimg.tostring(), 'raw', mode,
                                  0, 1)
            im.thumbnail((max_size, max_size), Image.ANTIALIAS)
            feat = leargist.color_gist(im)

            good_image_ids.append(image_id)
            feats.append(feat)
        except:
            continue
    return image_ids, feats
Exemple #12
0
    def get(self,image):
        """
        get gist descriptor

        :rtype: list of tuple (histo,bin_edges)
        """
        return leargist.color_gist(image.get_pil_object())
Exemple #13
0
def gist(img):
    warnings.filterwarnings("ignore")
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    pilImage= Image.fromarray(img)
    #Note that we put 1.5 to avoid having 960 dimensions per feature. Using 1 it just returns 60.
    des = leargist.color_gist(pilImage,1)
    return des
Exemple #14
0
    def gist_data(self):
        """
        Algorithm discussed on http://sarvamblog.blogspot.com/2014/08/supervised-classification-with-k-fold.html

        :return:  The GIST image data as a Pandas Series.
        """
        defult_tmp_dir = tempfile._get_default_tempdir()
        ln = len(self.rawdata)
        width = 256
        rem = ln % width
        a = array.array("B")
        a.fromstring(self.rawdata[0:ln - rem])
        g = np.reshape(a, (int(len(a) / width), width))
        g = np.uint8(g)
        filename = os.path.join(defult_tmp_dir, '{0}.png'.format(self.sha256))
        scipy.misc.imsave(filename, g)

        im = Image.open(filename)
        im1 = im.resize((64, 64), Image.ANTIALIAS)
        im.close()
        des = leargist.color_gist(im1)
        X = pd.Series(des[0:320])
        X.name = self.sha256
        os.remove(filename)
        return X
Exemple #15
0
def display_feature(num_of_pictures=20):
    counter = 0
    for index in xrange(20000):
        if counter >= num_of_pictures:
            return 
        name = "training_data/" + str(index + 1) + ".png"
        if os.path.isfile(name):
            img = cv2.imread(name, 0)
            hog, hog_image = ft.hog(img, visualise=True)
            img_gist = Image.open(name)
            gist = leargist.color_gist(img_gist)
            fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(8, 4), sharex=True, sharey=True)
            ax1.axis('off')
            ax1.imshow(img, cmap=plt.cm.gray)
            ax1.set_title('Input image')
            ax1.set_adjustable('box-forced')

            # hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02))

            ax2.axis('off')
            ax2.imshow(hog_image, cmap=plt.cm.gray)
            ax2.set_title('Histogram of Oriented Gradients')
            ax2.set_adjustable('box-forced')
            ax3.axis('off')
            ax3.imshow(gist, cmap=plt.cm.gray)
            ax3.set_title('GIST')
            ax3.set_adjustable('box-forced')
            plt.show()
            counter += 1
Exemple #16
0
def process(test_image):
    index = []

    tests = 500
    result = leargist.color_gist(test_image)

    for (start, items) in runs.items():
        for (i, (buffer, type)) in enumerate(items):
            gist = numpy.array(struct.unpack("f" * 960, buffer), dtype=float)
            index.append((numpy.linalg.norm(gist - result), i, type, start))

    index.sort()

    types = {}

    for fn in index[:100]:
        if not fn[2] in types:
            types[fn[2]] = 0
        types[fn[2]] += 1 / (1 + fn[0])

    total = sum(types.values())

    for type in types.keys():
        types[type] /= total

    result = []
    for fn in index[:10]:
        filename = "/tiny/tinyimages/subset/%s/%s.png" % (fn[2], fn[1] + fn[3])
        image = Image.open(filename)
        buffer = runs[fn[3]][fn[1]][0]
        gist = struct.unpack("f" * 960, buffer)
        result.append((image, gist, fn[2]))

    return (result, sorted([(b, a) for (a, b) in types.items()]))
Exemple #17
0
 def extract_gist_feature(self, img_path):
     import leargist
     resolution = cfg.RETRIEVAL_RESOLUTION
     img = Image.open(img_path)
     img = img.resize((resolution[1], resolution[0]))
     feat = leargist.color_gist(img).flatten()
     return feat
def compute_gist():
    gists = {}
    for pic in os.listdir(PICS_DIR):
        path = os.path.join(PICS_DIR,pic)
        logging.debug('GIST computing : '+path)
        im = Image.open(path)        
        g = leargist.color_gist(im)
        gists[path] = g
    pickle.dump(gists,open(PICS_DIR+'.gists','wb'))
Exemple #19
0
    def _decoder_callback(self,frame):
        try :
            import leargist 
        except: 
            raise NameError("Module leargist seems to be unavailable, please install it")

        if frame.framenumber in self.keys:
            im = Image.frombuffer("RGBA", (frame.width,frame.height),frame.data, "raw", "RGBA", 0, 1)                 
            desc = leargist.color_gist(im)
            self.gists.append(desc)                
Exemple #20
0
def GIST_descriptor(dir_im, img):
    path = dir_im+ "/"+str(img)
    img = Image.open(path)
    imsize  = (128, 128)
  #  img = np.asarray(img)

    #img_resized = transform.resize(img, imsize, preserve_range=True).astype(np.uint8)
    descriptor = leargist.color_gist(img)
    #print descriptor
    return descriptor
def get_gist_features(data_path, pickle_name=None):
    size = len(data_path)
    gist_features = np.zeros((size, 960))
    for i in range(size):
        if i % 500 == 0: print "{}/{}".format(i, size)
        im = Image.open(data_path[i])
        gist_features[i, :] = leargist.color_gist(im)

    if pickle_name is not None:
        pickle.dump(gist_features, open(pickle_name, 'wb'), protocol=2)
Exemple #22
0
def search():
    image = Image.open(request.files["image"].stream)

    gist = leargist.color_gist(image)
    html = []
    html.append(
        """
  <style type="text/css">
  *{margin:0;padding:0;}
  h1{font:bold 3em/2em Helvetica,sans-serif;}
  h1 a{font:11px sans-serif;color:#39c;}
  html{background-color:#f8f8f8;}
  body{width:1000px;margin:0 auto;}
  .bar{position:relative}
  b{height:70px;float:left;width:1px;display:block;position:relative;}
  i{background-color:#000;display:block;width:1px;position:absolute;bottom:0;}
  .bar{width:960px;height:70px;border:1px solid #eee;background-color:#fff;margin-left:70px;}
  .bar{margin-bottom:5px;}
  img{position:absolute;width:70px;left:-5.5em;}
  .breakdown div{float:left;display:block;height:50px;border-left:1px solid #999;margin-left:-1px;text-align:center;}
  .breakdown{height:50px;margin-bottom:10px;font:0.9em/50px sans-serif;color:#333;}
  strong{position:absolute;top:0.5em;left:0.5em;font:0.9em sans-serif;color:#999;}
  </style>
  """
    )

    output = StringIO.StringIO()
    image.save(output, format="PNG")

    html.append('<h1>Image Search <a href="/">reset search</a></h1>')
    html.append('<div class="bar"><img src="data:image/png;base64,%s"/>' % base64.b64encode(output.getvalue()))
    for i in xrange(960):
        html.append('<b><i style="height:%f%%"></i></b>' % (gist[i] * 100))
    html.append("</div>")
    html.append('<br style="height:5em;display:block;"/>')

    (results, types) = process(image)

    html.append('<div class="breakdown">')
    for (weight, key) in types[::-1]:
        html.append('<div style="width:%0.2f%%">%s (%0.1f%%)</div>' % (weight * 100, key, weight * 100))
    html.append("</div>")

    for image, g, type in results:
        output = StringIO.StringIO()
        image.save(output, format="PNG")

        html.append('<div class="bar"><img src="data:image/png;base64,%s"/>' % base64.b64encode(output.getvalue()))
        for i in xrange(960):
            html.append('<b><i style="height:%f%%"></i></b>' % (g[i] * 100))
        html.append("<strong>%s</strong></div>" % type)

    return "".join(html)
Exemple #23
0
def main_proc(query,
              image_path,
              random=False,
              query_cache=False,
              image_cache=False):
    """
    Return distance of a (query, image) pair
    """
    if random:
        return np.random.rand()

    global img2gist, query_miss, hash_miss, query_top10_cache, image_top10_cache
    if img2gist is None:
        img2gist = get_img2gist()

    im = Image.open(image_path)
    im = crop_resize(im, normal_size, True)
    desc = leargist.color_gist(im)

    if query_cache:
        A = query_top10_cache
    else:
        t1 = -time.time()
        A = query_top10_images(query)
        query_top10_cache = A
        print 'time for query_top10_images:', time.time() + t1

    if image_cache:
        B = image_top10_cache
    else:
        t2 = -time.time()
        B = gist_top10_images(image_path)
        image_top10_cache = B
        print 'time for gist_top10_images:', time.time() + t2

    if not A:
        if not query_cache:
            query_miss += 1
        return np.random.rand()
    if not B:
        hash_miss += 1
    sim_set = []

    t3 = -time.time()
    for i in A:
        gist_i = img2gist[i]
        for j in B:
            v = np.sum((gist_i - img2gist[j])**2)
            sim_set.append(v)
        sim_set.append(np.sum((gist_i - desc)**2))
    print 'time for compare:', time.time() + t3

    return min(sim_set)
Exemple #24
0
    def test_all(self):
        img = Image.open("../lear_gist/ar.ppm")
        descr = leargist.color_gist(img)
        self.assertTrue(
            numpy.allclose(descr[:4],
                           numpy.array([0.0579, 0.1926, 0.0933, 0.0662]),
                           atol=1e-3))
        self.assertTrue(
            numpy.allclose(descr[-3:],
                           numpy.array([0.0563, 0.0575, 0.0640]),
                           atol=1e-3))

        img = img.resize((256, 256), PIL.Image.ANTIALIAS)
        descr = leargist.color_gist(img)
        self.assertEqual(len(descr), 960)
        descr = leargist.color_gist(img, nblocks=3)
        self.assertEqual(len(descr), 540)

        img = img.convert('L')
        descr = leargist.color_gist(img)
        self.assertEqual(len(descr), 960)

        img = Image.open("roofs1.jpg")
        descr = leargist.color_gist(img)
        self.assertEqual(len(descr), 960)

        img = Image.open("roofs2.jpg")
        descr = leargist.color_gist(img)
        self.assertEqual(len(descr), 960)
Exemple #25
0
def main_proc(query, image_path, random=False, query_cache=False, image_cache=False):
    """
    Return distance of a (query, image) pair
    """
    if random:
        return np.random.rand()

    global img2gist, query_miss, hash_miss, query_top10_cache, image_top10_cache
    if img2gist is None:
        img2gist = get_img2gist()

    im = Image.open(image_path)
    im = crop_resize(im, normal_size, True)
    desc = leargist.color_gist(im)

    if query_cache:
        A = query_top10_cache
    else:
        t1 = -time.time()
        A = query_top10_images(query)
        query_top10_cache = A
        print 'time for query_top10_images:', time.time() + t1

    if image_cache:
        B = image_top10_cache
    else:
        t2 = -time.time()
        B = gist_top10_images(image_path)
        image_top10_cache = B
        print 'time for gist_top10_images:', time.time() + t2

    if not A:
        if not query_cache:
            query_miss += 1
        return np.random.rand()
    if not B:
        hash_miss += 1
    sim_set = []

    t3 = -time.time()
    for i in A:
        gist_i = img2gist[i]
        for j in B:
            v = np.sum((gist_i - img2gist[j]) ** 2)
            sim_set.append(v)
        sim_set.append(np.sum((gist_i - desc) ** 2))
    print 'time for compare:', time.time() + t3

    return min(sim_set)
Exemple #26
0
def create_color_gist(f_list):
    imgdict = {}
    for i in range(len(f_list)):
        # load the image and compute the descriptors
        imagePath = f_list[i]
        image = Image.open(imagePath)
        h = leargist.color_gist(image)
        # update the database
        # key: image Path, value: descriptors
        imgdict[imagePath.replace(
            '_', '/')] = h.tolist()  #needed to then serialize to json
        i = i + 1
        if (i % 10 == 0):
            print("processed %i images" % i)
    return (imgdict)
Exemple #27
0
 def search(self, img_path):
     im = Image.open(img_path)
     #im = ImageOps.fit(im, (64, 64))
     des = leargist.color_gist(im)
     code_vec = (des - self.mean).dot(self.pca).dot(self.rotation)
     code = 0
     for i in range(code_vec.shape[0]):
         if code_vec[i] >= 0:
             code += 2 << i
     iter = []
     for item in self.cache:
         dist = utils.hamming_dist(code, item[0])
         iter.append((dist, os.path.join(self.dir, item[1])))
     # Find 64 similar images
     n = 64
     res = heapq.nsmallest(n, iter)
     return [item[1] for item in res]
Exemple #28
0
 def search(self, img_path):
     im = Image.open(img_path)
     #im = ImageOps.fit(im, (64, 64))
     des = leargist.color_gist(im)
     code_vec = (des - self.mean).dot(self.pca).dot(self.rotation)
     code = 0
     for i in range(code_vec.shape[0]):
         if code_vec[i] >= 0:
             code += 2 << i
     iter = []
     for item in self.cache:
         dist = utils.hamming_dist(code, item[0])
         iter.append((dist, os.path.join(self.dir, item[1])))
     # Find 64 similar images
     n = 64
     res = heapq.nsmallest(n, iter)
     return [item[1] for item in res]
def main():
    parser = argparse.ArgumentParser(
        description='compute gist descriptor',
    )

    parser.add_argument(
        help='image file name',
        dest='imgfile',
    )

    args = parser.parse_args()

    im = Image.open(args.imgfile)
    desc = leargist.color_gist(im)

    print desc.shape
    print desc[:4]
def gist(index):
    import DAL
    import scipy
    import leargist
    import math
    import numpy as np
    tinyimages=DAL.create('tinyimages')
    img=scipy.misc.toimage( \
        tinyimages.byid(index).reshape(32,32,3, order="F").copy())
    #return leargist.color_gist(img)
    a = leargist.color_gist(img)
    vec = np.array(a)
    sd = math.sqrt(np.var(vec))
    mu = np.mean(vec)
    #normalization
    standarized_gist =  (vec- mu) / sd 
    #In this case the gist of 960
    #ret = [[index]]
    return list(standarized_gist)
Exemple #31
0
def get_gist(fname):
    """Get GIST image descriptor (image reshaped to square).
    """
    try:
        im = Image.open(fname)
    except IOError:
        # cannot open image
        print "cannot open: %s" % fname
        return np.ones((960,)) * -1        
    # resize to square image, for non-square GIST does not make sense
    s = im.size
    if s[0] != s[1]:
        d = np.min(im.size)
        im = im.resize((d,d), Image.ANTIALIAS)
    # calculate gist
    try:
        gist = color_gist(im)
    except: # something goes wrong, i.e. too small image
        print "error calculating gist:"
        return np.ones((960,)) * -1    
    return gist
Exemple #32
0
    def get_features(self, img_path):
        # Reshape net for single image input
        self.net.blobs['data'].reshape(1, 3, 227, 227)

        img = self.transformer.preprocess('data',
                                          caffe.io.load_image(img_path))
        self.net.blobs['data'].data[...] = img

        layer = 'fc7'
        out = self.net.forward(end=layer)

        # Open image for GIST descriptor calculation
        im = Image.open(img_path)
        factor = 250.0 / max(im.size)  # Resize for performance
        im = im.resize((int(im.size[0] * factor), int(im.size[1] * factor)),
                       Image.ANTIALIAS)

        return {
            'classes': self.net.blobs[layer].data.flatten(),
            'gist': leargist.color_gist(im),
        }
Exemple #33
0
    def load_features(self,
                      feats=['GIST', 'picodes2048', 'classemes'],
                      features_path='../features/images/',
                      images_path='../images/'):
        """
        load_features takes as input the list of desired features and computes the unary feature vector
        :param feats: the features we want to take into account
        :param features_path: the path to the picodes and classemes features
        :param images_path: the path to the downloaded images -- In case images are not downloaded None.
        if images_path == None get image through url. Used for GIST features
        :return: It saves result in self.__features as a unary np.array
        """

        features = []
        if 'GIST' in feats:
            feats.remove('GIST')
            if images_path is None:
                image_url = self.__url.replace('/html', '/detail')
                image_url = image_url.replace('.html', '.jpg')

                # get image through url
                response = requests.get(image_url)
                painting_file = BytesIO(response.content)
                img = Image.open(painting_file)
            else:
                # load image from disc
                img = Image.open(images_path + self.__name + '.png')

            # compute GIST features and append to features
            features.extend(leargist.color_gist(img).tolist())

        for feat_name in feats:
            file_name = features_path + self.__name + '_' + feat_name + '.dat'
            feat = np.fromfile(file_name, dtype='float32')
            feat = feat[2:]
            features.extend(feat.tolist())

        features = np.array(features)
        norm = np.linalg.norm(features)
        self.__features = features / norm
Exemple #34
0
 def classifyImage(self, feature_X, label_y, number):
     im = Image.open(self.filename)
     im1 = im.resize((64, 64), Image.ANTIALIAS)
     # 转换为64x64
     des = leargist.color_gist(im1)
     # 960 values
     feature = des[0:320]
     # 生成灰阶图,只需要前320内容
     query_feature = feature.reshape(1, -1)
     self.feature = query_feature
     # 获取特征和标签
     X = feature_X
     y = label_y
     n = number
     n_neighbors = 5
     # better to have this at the start of the code
     knn = KNeighborsClassifier(n_neighbors, weights='distance')
     knn.fit(X, y)
     num = int(knn.predict(query_feature))
     classname = n[num]
     proba = knn.predict_proba(query_feature)
     msg = [num, classname, proba]
     self.malwarSignal.emit(1, msg)
Exemple #35
0
    def load_features(self, feats=['GIST', 'picodes2048', 'classemes'], features_path='../features/images/', images_path='../images/'):
        """
        load_features takes as input the list of desired features and computes the unary feature vector
        :param feats: the features we want to take into account
        :param features_path: the path to the picodes and classemes features
        :param images_path: the path to the downloaded images -- In case images are not downloaded None.
        if images_path == None get image through url. Used for GIST features
        :return: It saves result in self.__features as a unary np.array
        """

        features = []
        if 'GIST' in feats:
            feats.remove('GIST')
            if images_path is None:
                image_url = self.__url.replace('/html', '/detail')
                image_url = image_url.replace('.html', '.jpg')

                # get image through url
                response = requests.get(image_url)
                painting_file = BytesIO(response.content)
                img = Image.open(painting_file)
            else:
                # load image from disc
                img = Image.open(images_path+self.__name+'.png')

            # compute GIST features and append to features
            features.extend(leargist.color_gist(img).tolist())

        for feat_name in feats:
            file_name = features_path+self.__name+'_'+feat_name+'.dat'
            feat = np.fromfile(file_name, dtype='float32')
            feat = feat[2:]
            features.extend(feat.tolist())

        features = np.array(features)
        norm = np.linalg.norm(features)
        self.__features = features/norm
Exemple #36
0
def create_color_gist(datapath):
    import leargist

    # get the list of images
    f_list = glob.glob(datapath + '*.jpg*')

    imgdict = {}

    for i in range(len(f_list)):
        # load the image and compute the descriptors
        imagePath = f_list[i]
        image = Image.open(imagePath)
        h = leargist.color_gist(image)
        # update the database
        # key: image Path, value: descriptors
        imgdict[imagePath] = h

        # print out progress
        i = i + 1
        if (i % 1 == 0):
            print("processed %i images" % i)

    #print(len(imgdict))
    return (imgdict)
Exemple #37
0
def gist_top10_images(img):
    """
    32 bit hash:
    bucket ratio: 225410/4294967296
    max conflicts: 8691

    16 bit hash:
    bucket ratio: 8135/65536
    max conficts: 93329
    """
    global lsh

    # name2path = get_name2path(train_file_map)

    # info of known dataset
    # print lsh.hash_tables[0].keys()[0]
    # print 'bucket ratio: %d/%d' % (len(lsh.hash_tables[0].keys()), 2 ** hash_len)
    # counts = []
    # t = lsh.hash_tables[0]
    # for k in t.keys():
    #     counts.append(len(t.get_list(k)))
    # print 'max conflicts:', max(counts)
    # print 'min conflicts:', min(counts)

    im = Image.open(img)
    im = crop_resize(im, normal_size, True)
    desc = leargist.color_gist(im)
    # for i in xrange(len(desc)):
    #     desc[i] += 0.01
    res = lsh.query(desc, num_results=10, distance_func="euclidean")
    # print 'num of results:', len(res)
    # for i in res:
    #     name = json.loads(i[0])[1]
    #     print name, name2path[name], i[1]

    return [json.loads(i[0])[1] for i in res]
Exemple #38
0
def gistfeatures(im, maxsize=None):
    """Extracts gist features (dims=960) from the given image.
    Optionally resizes the image using the thumbnail() function to maxsize.

    Uses the pyleargist library:
        http://pypi.python.org/pypi/pyleargist/
         or
        sudo easy_install pyleargist

    GIST extraction seems to scale about linearly with number of pixels,
    so resizing is often essential for fast speed.

    On error, returns None
    """
    import leargist
    if maxsize:
        #im.thumbnail(maxsize, Image.ANTIALIAS)
        im = im.resize(maxsize, Image.ANTIALIAS)
    try:
        ret = leargist.color_gist(im)
    except Exception:
        # some problem with leargist
        return None
    return ret
Exemple #39
0
def gist(path):
    im = Image.open(path)
    return leargist.color_gist(im)
 def gist_feature(self, fn):
     im = Image.open(fn)
     return leargist.color_gist(im)
Exemple #41
0
def grayscale_gist(image_filename):
    im = Image.open(image_filename)
    im = im.convert('L')  # Convert to luminosity only, i.e. grayscale
    descriptors = leargist.color_gist(im)

    return descriptors[:descriptors.size/3]
Exemple #42
0
def numeric(filename, i, goal):
    image = Image.open(filename)
    result = leargist.color_gist(image)
    return (distance(result, goal), filename)
Exemple #43
0
def gist_descriptor(image):
    return leargist.color_gist(image)
Exemple #44
0
def pack(
    input_folder,
    index_file,
    data_output_file,
    label_output_file=None,
    seed=42,
    dtype=np.float32,
    transform=None,
    has_category=True,
):
    """Pack picture files as numpy arrays with category folder as labels"""

    rng = random.Random(seed)
    if has_category:
        all_filenames = [
            (category, os.path.join(input_folder, category, filename))
            for category in os.listdir(input_folder)
            for filename in os.listdir(os.path.join(input_folder, category))
        ]

        # count the number of picture by category and drop excedent so that
        # all categories have equal number of samples
        rng.shuffle(all_filenames)

        counts = dict()
        for c, _ in all_filenames:
            if c in counts:
                counts[c] += 1
            else:
                counts[c] = 1
        limit = min(counts.values())

        resampled_filenames = []
        counts = dict()
        for c, fn in all_filenames:
            if c in counts:
                counts[c] += 1
            else:
                counts[c] = 1
            if counts[c] <= limit:
                resampled_filenames.append(fn)
    else:
        resampled_filenames = [os.path.join(input_folder, filename) for filename in os.listdir(input_folder)]
        rng.shuffle(resampled_filenames)

    with file(index_file, "wb") as f:
        f.write("\n".join(resampled_filenames))
        f.write("\n")

    reference = Image.open(resampled_filenames[0])

    w, h = reference.size

    dim = w * h
    if transform == "gist":
        dim = 960  # hardcoded for now

    # TODO: add microthumb transform as baseline too

    data_array = np.zeros((len(resampled_filenames), dim), dtype=dtype)
    for i, filepath in enumerate(resampled_filenames):
        im = Image.open(filepath)
        if transform == "gist":
            data_array[i, :] = leargist.color_gist(im)
        else:
            data_array[i, :] = img_to_array(im, w=w, h=h, dtype=dtype).flatten()

    if data_output_file.endswith(".gz"):
        np.save(GzipFile(data_output_file, "wb"), data_array)
    else:
        np.save(file(data_output_file, "wb"), data_array)
def gist(path):
    im = Image.open(path)
    return leargist.color_gist(im)
    for ii in range(temp2, temp3):
        y[ii] = jj
    temp2 = temp2 + int(temp1[jj + 1])

import Image, leargist

X = numpy.zeros((sum(no_imgs), 320))  # Feature Matrix
cnt = 0
for i in range(len(list_fams)):
    os.chdir(list_fams[i])
    img_list = glob.glob('*.png')  # Getting only 'png' files in a folder
    for j in range(len(img_list)):
        im = Image.open(img_list[j])
        im1 = im.resize((64, 64), Image.ANTIALIAS)
        # for faster computation
        des = leargist.color_gist(im1)
        X[cnt] = des[0:320]
        cnt = cnt + 1
    os.chdir('..')
import random
from sklearn.cross_validation import StratifiedKFold
from sklearn.utils import shuffle

n_samples, n_features = X.shape
p = range(n_samples)  # an index array, 0:n_samples
random.seed(random.random())
random.shuffle(p)  # the index array is now shuffled

X, y = X[p], y[p]  # both the arrays are now shuffled

kfold = 10  # no. of folds (better to have this at the start of the code)
Exemple #47
0
# print filename
# im = Image.open(file_path)
# dsr = leargist.color_gist(im)
# dsr = np.array(dsr)
# gists[filename] = dsr
# if cnt % 500 == 0:
# print cnt

cnt = 0
print 'Converting ...'

for filename in train_file_list:
    file_path = os.path.join(source_dir, filename)
    im = Image.open(file_path)
    try:
        dsr = leargist.color_gist(im)
        dsr = np.array(dsr)
        gists[filename] = dsr
        # print len(dsr)
        if cnt % 100 == 0:
            print cnt
        cnt += 1
        dsr.tofile(os.path.join(result_path, filename.replace('jpg', 'vec')))

    except:
        print 'Error at', filename
        continue

print 'Done!'
import concurrent.futures
import glob
Exemple #48
0
			buf=afile.read(BLOCKSIZE)
	hash=(hasher.hexdigest())
	filename = x;
	f = open(filename,'rb');
	ln = os.path.getsize(filename); # length of file in bytes
	width = 256;
	rem = ln%width; 
	a = array.array("B"); # uint8 array
	a.fromfile(f,ln-rem);
	f.close(); 
	g = numpy.reshape(a,(len(a)/width,width));
	g = numpy.uint8(g);
	scipy.misc.imsave('images/'+hash+'.png',g); # save the image
	im = Image.open('images/'+hash+'.png');
	im1 = im.resize((64,64)); # for faster computation
	des = leargist.color_gist(im1); # 960 values
	feature = des[0:320]; # since the image is grayscale, we need only first 320 values
	sample[y]=hash; 
	sample_name[y]=list[y];

	corpus_features[y]=feature;


print 'Done creating malware images';


numpy.save('corpus_features.npy',corpus_features); 

X = numpy.load('corpus_features.npy');

ms1 = MeanShift(bandwidth=0.2);
    def gist_int(self, im, mask, centers, imAll):
        '''
        description needs to be inserted
        320x1 double
        '''
        log = Logger()     
#        i = Image.fromarray(im)
        #im2 = self.im2double(im)
        #im2 = Image.fromarray(im2,'L')
        #descriptors = leargist.color_gist(im2, nblocks=4)
        #return descriptors
        
        numberBlocks = 4
        if im.ndim == 3:#if ndims(im) == 3
            #img = Image.fromarray(imAll)
            #img.convert('LA')
             
            #I = np.array(img)
             
#            I = cv2.cvtColor(imAll, cv2.COLOR_BGR2GRAY)
            #log.start('round',1,1)
            I = np.round(self.rgb2gray(imAll)).astype(int)
            #log.update()
        
        
#            I = imAll.convert('LA')
#anschauen            I = rgb2gray(imAll)
#       
        #log.start('find', 1, 1)   
        (y,x) = mask.nonzero()#[y x] = find(mask)
        y1 = min(y) 
        y2 = max(y) 
        x1 = min(x)
        x2 = max(x)
        h = y2-y1+1
        w=x2-x1+1
        #log.update()
        
        #log.start('padAmount',1, 1)
        padAmount = np.around((h-w)/2.0)#padAmount = round((h-w)/2)
        #log.update()
        #log.start('self.pad',1,1)
        I = self.pad(I,(max(-padAmount,0), max(padAmount,0)),symmetric=True)
        #log.update()
        #log.start('y,y max',1,1)
        #I = padarray(I,[max(-padAmount,0) max(padAmount,0)],'symmetric','both')
        mh = (y1+y2)/2+max(-padAmount,0) 
        #log.update()
        #log.start('y,x max',1,1)
        mw = (x1+x2)/2+max(padAmount,0)
        #log.update()
        #log.start('np.floor',1,1)
        s = np.floor(max(h,w)/2)
        #log.update()
        #log.start('I[x,y,x,y,x]',1,1)
        I = I[int(np.fix(mh-s+0.5)):int(np.fix(mh+s))+1,int(np.fix(mw-s+0.5)):int(np.fix(mw+s))+1]
        ####I = I(fix(mh-s+.5):fix(mh+s),fix(mw-s+.5):fix(mw+s))
        #log.update()
        #log.start('centers',1,1)  
        G = centers['gist_centers']
        #log.update()
        #log.start('shape',1,1)
        (ro,co,_) = G.shape#[ro co ch] = size(G)
        #log.update()
        #log.start('imresizecrop',1,1)
        I = self.imresizecrop(I, (ro, co))
        #print 'aha'
        #log.update()
        #log.start('Copy',1,1)
        I = I.copy()
        #log.update()
        #log.start('fromArray',1,1)
        I = Image.fromarray(I)
        #log.update()
        #log.start('convert_L',1,1)
        I = I.convert('L')
        #log.update()
        #log.start('descriptors',1,1)
        descriptors = leargist.color_gist(I, numberBlocks)  # @UndefinedVariable
        #log.update()
        
        return descriptors[0:320]
 def get(self, frame):
     img = Image.fromarray(frame)
     desc = leargist.color_gist(img)
     return desc
 def get_gist(self, img):
     gist_features = np.zeros((1, 960))
     im = Image.fromarray(img.astype('uint8'))
     gist_features[0, :] = leargist.color_gist(im)
     return gist_features
def gist(path):
    try:
        im = Image.open(path)
        return leargist.color_gist(im)
    except:
        return None
Exemple #53
0
def gist_descriptor(image):
  return leargist.color_gist(image)
Exemple #54
0
 def fun(pixels):
     p = PIL.Image.fromarray(pixels)
     return leargist.color_gist(p)
Exemple #55
0
from PIL import Image
import leargist

from numpy.linalg import norm
from numpy.testing import assert_equal
from scipy.misc import imread

im = Image.open('lear_gist/ar.ppm')
image = imread(im.filename)
gist = leargist.color_gist(im),
gist_numpy = leargist.color_gist_numpy(image)
assert_equal(leargist.color_gist(im), leargist.color_gist_numpy(image))
Exemple #56
0
def main() :

    np.random.seed(1234)

    # display_feature()

    original_labels = open('trainLabels_modified.csv', 'rb')
    labelreader = csv.reader(original_labels)

    classes = ['frog', 'deer', 'ship', 'airplane']
    all_y = np.zeros(20000)
    i = 0
    for row in labelreader:
        if i > 0:
            all_y[i - 1] = classes.index(row[1])
        i += 1

    # train_y = np.zeros(3000)
    # valid_y = np.zeros(1000)

    # Raw Features
    train_X_raw = np.zeros((3000, 3072))
    train_y = np.zeros(3000)

    i = 0
    for index in xrange(20000):
        name = "training_data/" + str(index + 1) + ".png"
        if os.path.isfile(name):
            img = mpimg.imread(name)
            train_X_raw[i] = img.flatten()
            train_y[i] = all_y[index]
            i += 1


    # create stratified folds (10-fold CV)
    kf = StratifiedKFold(train_y, n_folds=10)


    print "Raw feature:"


    # select hyperparameters for random forest classifier
    numTree, depth = select_param_randomForest(train_X_raw, train_y, kf)
    clf = RandomForestClassifier(n_estimators=numTree, max_depth=depth, criterion='entropy')
    accuracy = cv_performance(clf, train_X_raw, train_y, kf)
    print '     Random forest with %d trees, each with max depth %d accuracy %f %f %f %f'  % (numTree, depth, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # Best: numTree = 500, max depth = 500


    # select hyperparameters for kNN classifier
    k = select_param_kNN(train_X_raw, train_y, kf)
    clf = KNeighborsClassifier(n_neighbors=k)
    #clf.fit(train_X_raw, train_y)
    accuracy = cv_performance(clf, train_X_raw, train_y, kf)
    print '     KNN with %d neighbors accuracy %f %f %f %f' % (k, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # Best: k = 5


    # select hyperparameters for Log Reg
    R_ovr = generate_output_codes(num_classes, 'ovr')
    R_ovo = generate_output_codes(num_classes, 'ovo')

    codes = {}
    codes["ovr"] = R_ovr
    codes["ovo"] = R_ovo


    c, code = select_param_logReg(train_X_raw, train_y, kf, codes)
    clf = Multiclass(code, C=c, clf='logistic')
    accuracy = cv_performance(clf, train_X_raw, train_y, kf)
    print '     Log Reg with %s output code, C = %f accuracy %f %f %f %f'  % (code, c, accuracy[0], accuracy[1], accuracy[2], accuracy[3])


    # select hyperparameters for SVM classifier with poly kernel
    oc, C, deg, gamma = select_param_poly(train_X_raw, train_y, kf)
    clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='poly', degree=deg, gamma=gamma, coef0 = 1.0)
    accuracy = cv_performance(clf, train_X_raw, train_y, kf)
    print '     SVM poly with %s output code, C = %f, degree = %f, gamma = %f accuracy %f %f %f %f'  % (oc, C, deg, gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])


    # select hyperparameters for SVM classifier with RBF kernel
    oc, Gamma, C = select_param_rbf(train_X_raw, train_y, kf)
    clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='rbf', gamma=Gamma)
    accuracy = cv_performance(clf, train_X_raw, train_y, kf)
    print '     SVM RBF with %s output code, C = %f, Gamma = %f accuracy %f %f %f %f'  % (oc, C, Gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # Best: ovo, c = 10, gamma = 0.00390625



    # Extract Features using GIST Descriptor
    train_X_gist = np.zeros((3000, 960))

    i = 0
    for index in xrange(20000):
        name = "training_data/" + str(index + 1) + ".png"
        if os.path.isfile(name):
            img = Image.open(name)
            gist = leargist.color_gist(img)
            train_X_gist[i] = gist
            i += 1

    print "GIST (without PCA):"

    # select hyperparameters for kNN classifier
    k = select_param_kNN(train_X_gist, train_y, kf)
    clf = KNeighborsClassifier(n_neighbors=k)
    #clf.fit(train_X_raw, train_y)
    accuracy = cv_performance(clf, train_X_gist, train_y, kf)
    print '     KNN with %d neighbors accuracy %f %f %f %f' % (k, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # k = 12


    # select hyperparameters for random forest classifier
    numTree, depth = select_param_randomForest(train_X_gist, train_y, kf)
    clf = RandomForestClassifier(n_estimators=numTree, max_depth=depth, criterion='entropy')
    accuracy = cv_performance(clf, train_X_gist, train_y, kf)
    print '     Random forest with %d trees, each with max depth %d accuracy %f %f %f %f'  % (numTree, depth, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # numTree = 500, max depth = 200


     # select hyperparameters for SVM classifier with poly kernel
    oc, C, deg, gamma = select_param_poly(train_X_gist, train_y, kf)
    clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='poly', degree=deg, gamma=gamma, coef0 = 1.0)
    accuracy = cv_performance(clf, train_X_gist, train_y, kf)
    print '     SVM poly with %s output code, C = %f, degree = %f, gamma = %f accuracy %f %f %f %f'  % (oc, C, deg, gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])


    # select hyperparameters for SVM classifier with RBF kernel
    oc, Gamma, C = select_param_rbf(train_X_gist, train_y, kf)
    clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='rbf', gamma=Gamma)
    accuracy = cv_performance(clf, train_X_gist, train_y, kf)
    print '     SVM RBF with %s output code, C = %f, Gamma = %f accuracy %f %f %f %f'  % (oc, C, Gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # ovo, C = 10, Gamma = 0.25


    # select hyperparameters for Log Reg
    R_ovr = generate_output_codes(num_classes, 'ovr')
    R_ovo = generate_output_codes(num_classes, 'ovo')

    codes = {}
    codes["ovr"] = R_ovr
    codes["ovo"] = R_ovo

    c, code = select_param_logReg(train_X_gist, train_y, kf, codes)
    clf = Multiclass(code, C=c, clf='logistic')
    accuracy = cv_performance(clf, train_X_gist, train_y, kf)
    print '     Log Reg with %s output code, C = %f accuracy %f %f %f %f'  % (code, c, accuracy[0], accuracy[1], accuracy[2], accuracy[3])



   # Extract Features using HOG Descriptor
    train_X_hog = np.zeros((3000, 324))

    i = 0
    for index in xrange(20000):
        name = "training_data/" + str(index + 1) + ".png"
        if os.path.isfile(name):
            img = cv2.imread(name, 0)
            hog = ft.hog(img)
            train_X_hog[i] = hog
            i += 1


    print "HOG (without PCA):"

    # select hyperparameters for kNN classifier
    k = select_param_kNN(train_X_hog, train_y, kf)
    clf = KNeighborsClassifier(n_neighbors=k)
    #clf.fit(train_X_raw, train_y)
    accuracy = cv_performance(clf, train_X_hog, train_y, kf)
    print '     KNN with %d neighbors accuracy %f %f %f %f' % (k, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # k = 12


    # select hyperparameters for random forest classifier
    numTree, depth = select_param_randomForest(train_X_hog, train_y, kf)
    clf = RandomForestClassifier(n_estimators=numTree, max_depth=depth, criterion='entropy')
    accuracy = cv_performance(clf, train_X_hog, train_y, kf)
    print '     Random forest with %d trees, each with max depth %d accuracy %f %f %f %f'  % (numTree, depth, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # numTree = 500, max depth = 200


    # select hyperparameters for SVM classifier with poly kernel
    oc, C, deg, gamma = select_param_poly(train_X_hog, train_y, kf)
    clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='poly', degree=deg, gamma=gamma, coef0 = 1.0)
    accuracy = cv_performance(clf, train_X_hog, train_y, kf)
    print '     SVM poly with %s output code, C = %f, degree = %f, gamma = %f accuracy %f %f %f %f'  % (oc, C, deg, gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])


    # select hyperparameters for SVM classifier with RBF kernel
    oc, Gamma, C = select_param_rbf(train_X_hog, train_y, kf)
    clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='rbf', gamma=Gamma)
    accuracy = cv_performance(clf, train_X_hog, train_y, kf)
    print '     SVM RBF with %s output code, C = %f, Gamma = %f accuracy %f %f %f %f'  % (oc, C, Gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
    # ovo, C = 10, Gamma = 0.25


    # select hyperparameters for Log Reg
    R_ovr = generate_output_codes(num_classes, 'ovr')
    R_ovo = generate_output_codes(num_classes, 'ovo')

    codes = {}
    codes["ovr"] = R_ovr
    codes["ovo"] = R_ovo

    c, code = select_param_logReg(train_X_hog, train_y, kf, codes)
    clf = Multiclass(code, C=c, clf='logistic')
    accuracy = cv_performance(clf, train_X_hog, train_y, kf)
    print '     Log Reg with %s output code, C = %f accuracy %f %f %f %f'  % (code, c, accuracy[0], accuracy[1], accuracy[2], accuracy[3])



    # Using PCA on raw features
    l_list = [50, 100, 200, 500]
    U_train, mu_train = util.PCA(train_X_raw)
    
    for l in l_list:

        print "PCA with %d principal components on raw features:" % l
        Z_train, Ul_train = util.apply_PCA_from_Eig(train_X_raw, U_train, l, mu_train)
        train_X_rec = util.reconstruct_from_PCA(Z_train, Ul_train, mu_train)
    

        # select hyperparameters for kNN classifier
        k = select_param_kNN(train_X_rec, train_y, kf)
        clf = KNeighborsClassifier(n_neighbors=k)
        #clf.fit(train_X_raw, train_y)
        accuracy = cv_performance(clf, train_X_rec, train_y, kf)
        print '     KNN with %d neighbors accuracy %f %f %f %f' % (k, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
        # k = 12


        # select hyperparameters for random forest classifier
        numTree, depth = select_param_randomForest(train_X_rec, train_y, kf)
        clf = RandomForestClassifier(n_estimators=numTree, max_depth=depth, criterion='entropy')
        accuracy = cv_performance(clf, train_X_rec, train_y, kf)
        print '     Random forest with %d trees, each with max depth %d accuracy %f %f %f %f'  % (numTree, depth, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
        # numTree = 500, max depth = 200


        # select hyperparameters for SVM classifier with poly kernel
        oc, C, deg, gamma = select_param_poly(train_X_rec, train_y, kf)
        clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='poly', degree=deg, gamma=gamma, coef0 = 1.0)
        accuracy = cv_performance(clf, train_X_rec, train_y, kf)
        print '     SVM poly with %s output code, C = %f, degree = %f, gamma = %f accuracy %f %f %f %f'  % (oc, C, deg, gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])


        # select hyperparameters for SVM classifier with RBF kernel
        oc, Gamma, C = select_param_rbf(train_X_rec, train_y, kf)
        clf = Multiclass(generate_output_codes(num_classes, oc), C=C, clf='svm', kernel='rbf', gamma=Gamma)
        accuracy = cv_performance(clf, train_X_rec, train_y, kf)
        print '     SVM RBF with %s output code, C = %f, Gamma = %f accuracy %f %f %f %f'  % (oc, C, Gamma, accuracy[0], accuracy[1], accuracy[2], accuracy[3])
        # ovo, C = 10, Gamma = 0.25


        # select hyperparameters for Log Reg
        R_ovr = generate_output_codes(num_classes, 'ovr')
        R_ovo = generate_output_codes(num_classes, 'ovo')

        codes = {}
        codes["ovr"] = R_ovr
        codes["ovo"] = R_ovo

        c, code = select_param_logReg(train_X_rec, train_y, kf, codes)
        clf = Multiclass(code, C=c, clf='logistic')
        accuracy = cv_performance(clf, train_X_rec, train_y, kf)
        print '     Log Reg with %s output code, C = %f accuracy %f %f %f %f'  % (code, c, accuracy[0], accuracy[1], accuracy[2], accuracy[3])

    exit(0)