def write_corpus(fname, corpus, progress_cnt=1000, index=False, num_terms=None, metadata=False): """ Save the vector space representation of an entire corpus to disk. Note that the documents are processed one at a time, so the whole corpus is allowed to be larger than the available RAM. """ mw = MmWriter(fname) # write empty headers to the file (with enough space to be overwritten later) mw.write_headers(-1, -1, -1) # will print 50 spaces followed by newline on the stats line # calculate necessary header info (nnz elements, num terms, num docs) while writing out vectors _num_terms, num_nnz = 0, 0 docno, poslast = -1, -1 offsets = [] if hasattr(corpus, 'metadata'): orig_metadata = corpus.metadata corpus.metadata = metadata if metadata: docno2metadata = {} else: metadata = False for docno, doc in enumerate(corpus): if metadata: bow, data = doc docno2metadata[docno] = data else: bow = doc if docno % progress_cnt == 0: logger.info("PROGRESS: saving document #%i" % docno) if index: posnow = mw.fout.tell() if posnow == poslast: offsets[-1] = -1 offsets.append(posnow) poslast = posnow max_id, veclen = mw.write_vector(docno, bow) _num_terms = max(_num_terms, 1 + max_id) num_nnz += veclen if metadata: utils.pickle(docno2metadata, fname + '.metadata.cpickle') corpus.metadata = orig_metadata num_docs = docno + 1 num_terms = num_terms or _num_terms if num_docs * num_terms != 0: logger.info("saved %ix%i matrix, density=%.3f%% (%i/%i)" % ( num_docs, num_terms, 100.0 * num_nnz / (num_docs * num_terms), num_nnz, num_docs * num_terms)) # now write proper headers, by seeking and overwriting the spaces written earlier mw.fake_headers(num_docs, num_terms, num_nnz) mw.close() if index: return offsets
def _calibrate(images_path=IMAGES_PATH, chessboard_rows=CHESSBOARD_ROWS, chessboard_cols=CHESSBOARD_COLS, image_size=CALIBRATION_IMAGE_SIZE, calibration_pickle_file=CALIBRATION_PICKLE_FILE): obj = np.zeros((chessboard_rows * chessboard_cols, 3), np.float32) obj[:, :2] = np.mgrid[:chessboard_cols, :chessboard_rows].T.reshape( -1, 2) points_object = [] points_image = [] images = glob.glob(images_path) for image in images: image_array = imread(image) if image_array.shape != image_size: image_array = imresize(image_array, image_size) gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY) ret, corners = cv2.findChessboardCorners( gray, (chessboard_cols, chessboard_rows), None) if ret: points_object.append(obj) points_image.append(corners) calibration = (points_object, points_image) utils.pickle(calibration, calibration_pickle_file) return calibration
def gaussMixture(testX, goodSample, data=None, train=False, plot=False): if train==True: n_classes = 3 covar_type = 'full' est = GMM(n_components=n_classes, covariance_type=covar_type) est.fit(data) utils.pickle(est, 'SrcTeam/capsuleData/capsule_gauss') else: est = utils.unpickle('SrcTeam/capsuleData/capsule_gauss') numMatch = 0.0 numGood = goodSample.shape[0] testData = np.reshape(testX,(1,testX.size)) predLabel = est.predict(testData) for i in range(numGood): if est.predict(goodSample[i:i+1,:]) == predLabel: numMatch += 1 if plot==True: fig = pl.figure() pl.clf() ax = Axes3D(fig) labels = est.predict(data) ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float)) pl.show() return float(numMatch) / numGood
def k_means(testX, goodSample, data=None, train=False, plot=False): if train==True: n_clusters = 3 est = KMeans(n_clusters) est.fit(data) centers = est.cluster_centers_ utils.pickle(est, 'SrcTeam/capsuleData/capsule_k_means') else: est = utils.unpickle('SrcTeam/capsuleData/capsule_k_means') numMatch = 0.0 numGood = goodSample.shape[0] #sampleLabel = clusterLabel(centers, sample) testLabel = est.predict(testX) for i in range(numGood): if est.predict(goodSample[i,:]) == testLabel: numMatch += 1 if plot==True: fig = pl.figure() pl.clf() ax = Axes3D(fig) labels = est.labels_ ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float)) pl.show() return float(numMatch) / numGood
def gaussMixture(sample, goodSample, data=None, train=True, plot=False): if train==True: n_classes = 3 covar_type = 'full' classifier = GMM(n_components=n_classes, covariance_type=covar_type) classifier.fit(data) utils.pickle(classifier, 'data/capsule_gauss') else: utils.classifier = unpickle('data/capsule_gauss') numMatch = 0.0 numGood = goodSample.shape[0] sampleLabel = classifier.predict(sample) for i in range(numGood): if classifier.predict(goodSample[i,:]) == sampleLabel: numMatch += 1 if plot==True: fig = pl.figure() pl.clf() ax = Axes3D(fig) labels = est.predict(data) ax.scatter(data[:,0],data[:,1],data[:,2],c=labels.astype(np.float)) pl.show() return float(numMatch) / numGood
def main(args): files, clean_labels = parse(osp.join(args.data_root, 'clean_train.txt')) files, noisy_labels = parse(osp.join(args.data_root, 'noisy_train.txt')) matrix_c = compute_matrix_c(clean_labels, noisy_labels) write_matrix(matrix_c, osp.join(args.data_root, 'matrix_c.txt')) pickle(matrix_c, osp.join(args.data_root, 'matrix_c.pkl')) noise_types = get_noise_types(clean_labels, noisy_labels, matrix_c) make_data(files, noise_types, args.data_root)
def main(args): size = args.size q = generate_matrix_q(args.level) write_matrix( q, osp.join(args.data_root, 'matrix_q' + repr(args.level) + '.txt')) pickle(q, osp.join(args.data_root, 'matrix_q' + repr(args.level) + '.pkl')) files, labels = parse(osp.join(args.data_root, 'train.txt')) noisy_labels = corrupt(labels, q) write_file_label_list(files[:size], labels[:size], osp.join(args.data_root, 'clean.txt')) write_file_label_list( files[:size], noisy_labels[:size], osp.join(args.data_root, 'noisy_' + repr(args.level) + '.txt')) write_list( noisy_labels[:size], osp.join(args.data_root, 'labels_noisy_' + repr(args.level) + '.txt')) write_list([f + ' -1' for f in files[:size]], osp.join(args.data_root, 'images.txt'))
def main(args): mkdir_if_missing(args.output_dir) # training data data = [] labels = [] for i in xrange(1, 6): dic = unpickle(osp.join(args.data_root, 'data_batch_{}'.format(i))) data.append(dic['data']) labels = np.r_[labels, dic['labels']] data = np.vstack(data) make_data(data, labels, args.output_dir, 'train') # test data dic = unpickle(osp.join(args.data_root, 'test_batch')) make_data(dic['data'], dic['labels'], args.output_dir, 'test') # Identity for confusion initialization matrix_I = np.identity(10) write_matrix(matrix_I, osp.join(args.output_dir, 'identity.txt')) pickle(matrix_I, osp.join(args.output_dir, 'identity.pkl'))
def main(args): q = generate_matrix_q(args.level) write_matrix(q, osp.join(args.data_root, 'matrix_q.txt')) pickle(q, osp.join(args.data_root, 'matrix_q.pkl')) files, labels = parse(osp.join(args.data_root, 'train.txt')) noisy_labels = corrupt(labels, q) write_file_label_list(files[:10000], labels[:10000], osp.join(args.data_root, 'clean_train.txt')) write_file_label_list(files[:10000], noisy_labels[:10000], osp.join(args.data_root, 'noisy_train.txt')) noisy_as_clean_labels = labels[:10000] + noisy_labels[10000:] noisy_as_none_labels = labels[:10000] + [-1] * 40000 clean_as_none_labels = [-1] * 10000 + noisy_labels[10000:] merged = zip(files, noisy_as_clean_labels, noisy_as_none_labels, clean_as_none_labels) np.random.shuffle(merged) files, nacl, nanl, canl = zip(*merged) write_file_label_list(files, nacl, osp.join(args.data_root, 'mixed_train.txt')) write_list([f + ' -1' for f in files], osp.join(args.data_root, 'mixed_train_images.txt')) write_list(nanl, osp.join(args.data_root, 'mixed_train_label_clean.txt')) write_list(canl, osp.join(args.data_root, 'mixed_train_label_noisy.txt'))
def chooseAction(self, observedState): """ Here, choose pacman's next action based on the current state of the game. This is where all the action happens. """ # calculate reward (score delta) for last action current_score = observedState.score last_score = self.score reward = current_score - last_score if self.chatter: print reward # pass reward to learner self.learner.reward_callback(reward) # apply basis function to calculate new state state = self.basis(observedState) # ask learner to plan new state allowed_action_codes = [self.actionCodes[a] for a in self.actionBasis.allowedActions(self, observedState)] action_code = self.learner.action_callback(state,allowed_action_codes) # update score self.score = current_score # update number of actions taken self.action_count += 1 # save results if((self.save_every > 0) and (self.action_count % self.save_every == 0)): if self.chatter: print "Saving..." utils.pickle(self.learner, self.learn_file) # take action if self.chatter: print state, self.actions[action_code], action = self.actionBasis(observedState, self.actions[action_code]) return action
def main(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # parse gpus gpus = map(int, args.gpus.split(',')) assert len(gpus) >= mpi_size, "Number of GPUs must be >= MPI size" cfg.GPU_ID = gpus[mpi_rank] # parse feature blob names blob_names = args.blob_names.split(',') print('Using config:') pprint.pprint(cfg) while not osp.exists(args.caffemodel) and args.wait: print('Waiting for {} to exist...'.format(args.caffemodel)) time.sleep(10) # load imdb imdb = get_imdb(args.imdb_name) root_dir = imdb._root_dir images_dir = imdb._data_path output_dir = get_output_dir(imdb.name, osp.splitext(osp.basename(args.caffemodel))[0]) if args.eval_only: def _load(fname): fpath = osp.join(output_dir, fname) assert osp.isfile(fpath), "Must have extracted detections and " \ "features first before evaluation" return unpickle(fpath) if mpi_rank == 0: gboxes = _load('gallery_detections.pkl') gfeatures = _load('gallery_features.pkl') pfeatures = _load('probe_features.pkl') else: # setup caffe caffe.mpi_init() caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) # 1. Detect and extract features from all the gallery images in the imdb start, end = mpi_dispatch(len(imdb.image_index), mpi_size, mpi_rank) if args.use_gt: net = caffe.Net(args.probe_def, args.caffemodel, caffe.TEST) gboxes, gfeatures = usegt_and_exfeat(net, imdb, start=start, end=end, blob_names=blob_names) else: net = caffe.Net(args.gallery_def, args.caffemodel, caffe.TEST) gboxes, gfeatures = detect_and_exfeat(net, imdb, start=start, end=end, blob_names=blob_names) gboxes = mpi_collect(mpi_comm, mpi_rank, gboxes) gfeatures = mpi_collect(mpi_comm, mpi_rank, gfeatures) del net # to release the cudnn conv static workspace # 2. Only extract features from given probe rois start, end = mpi_dispatch(len(imdb.probes), mpi_size, mpi_rank) net = caffe.Net(args.probe_def, args.caffemodel, caffe.TEST) pfeatures = exfeat(net, imdb.probes, start=start, end=end, blob_names=blob_names) pfeatures = mpi_collect(mpi_comm, mpi_rank, pfeatures) del net # Save if mpi_rank == 0: pickle(gboxes, osp.join(output_dir, 'gallery_detections.pkl')) pickle(gfeatures, osp.join(output_dir, 'gallery_features.pkl')) pickle(pfeatures, osp.join(output_dir, 'probe_features.pkl')) # Evaluate if mpi_rank == 0: imdb.evaluate_detections(gboxes, det_thresh=args.det_thresh) imdb.evaluate_detections(gboxes, det_thresh=args.det_thresh, labeled_only=True) imdb.evaluate_search(gboxes, gfeatures['feat'], pfeatures['feat'], det_thresh=args.det_thresh, gallery_size=args.gallery_size, dump_json=osp.join(output_dir, 'results.json'))
def write_corpus(fname, corpus, progress_cnt=1000, index=False, num_terms=None, metadata=False): """ Save the vector space representation of an entire corpus to disk. Note that the documents are processed one at a time, so the whole corpus is allowed to be larger than the available RAM. """ mw = MmWriter(fname) # write empty headers to the file (with enough space to be overwritten later) mw.write_headers( -1, -1, -1) # will print 50 spaces followed by newline on the stats line # calculate necessary header info (nnz elements, num terms, num docs) while writing out vectors _num_terms, num_nnz = 0, 0 docno, poslast = -1, -1 offsets = [] if hasattr(corpus, 'metadata'): orig_metadata = corpus.metadata corpus.metadata = metadata if metadata: docno2metadata = {} else: metadata = False for docno, doc in enumerate(corpus): if metadata: bow, data = doc docno2metadata[docno] = data else: bow = doc if docno % progress_cnt == 0: logger.info("PROGRESS: saving document #%i" % docno) if index: posnow = mw.fout.tell() if posnow == poslast: offsets[-1] = -1 offsets.append(posnow) poslast = posnow max_id, veclen = mw.write_vector(docno, bow) _num_terms = max(_num_terms, 1 + max_id) num_nnz += veclen if metadata: utils.pickle(docno2metadata, fname + '.metadata.cpickle') corpus.metadata = orig_metadata num_docs = docno + 1 num_terms = num_terms or _num_terms if num_docs * num_terms != 0: logger.info( "saved %ix%i matrix, density=%.3f%% (%i/%i)" % (num_docs, num_terms, 100.0 * num_nnz / (num_docs * num_terms), num_nnz, num_docs * num_terms)) # now write proper headers, by seeking and overwriting the spaces written earlier mw.fake_headers(num_docs, num_terms, num_nnz) mw.close() if index: return offsets
def serialize(self): '''Pickle the trained recognizer to the models/ directory of the dataset.''' if not os.path.exists(self._datadir + '/models/'): os.mkdir(self._datadir + '/models/') pickle(self, self._datadir + '/models/' + self._modelname)
def main(args): mkdir_if_missing(args.output_dir) matrix_I = np.identity(2) write_matrix(matrix_I, osp.join(args.output_dir, 'identity.txt')) pickle(matrix_I, osp.join(args.output_dir, 'identity.pkl'))
def main(args): q = generate_matrix_q(args.level) q = np.transpose(q) write_matrix( q, osp.join(args.data_root, 'matrix_q' + repr(args.level) + '.txt')) pickle(q, osp.join(args.data_root, 'matrix_q' + repr(args.level) + '.pkl'))
def gt_roidb(self): cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.isfile(cache_file): roidb = unpickle(cache_file) return roidb # Load all images and build a dict from image to boxes all_imgs = loadmat(osp.join(self._root_dir, 'annotation', 'Images.mat')) all_imgs = all_imgs['Img'].squeeze() name_to_boxes = {} name_to_pids = {} for im_name, __, boxes in all_imgs: im_name = str(im_name[0]) boxes = np.asarray([b[0] for b in boxes[0]]) boxes = boxes.reshape(boxes.shape[0], 4) valid_index = np.where((boxes[:, 2] > 0) & (boxes[:, 3] > 0))[0] assert valid_index.size > 0, \ 'Warning: {} has no valid boxes.'.format(im_name) boxes = boxes[valid_index] name_to_boxes[im_name] = boxes.astype(np.int32) name_to_pids[im_name] = -1 * np.ones(boxes.shape[0], dtype=np.int32) def _set_box_pid(boxes, box, pids, pid): for i in xrange(boxes.shape[0]): if np.all(boxes[i] == box): pids[i] = pid return print 'Warning: person {} box {} cannot find in Images'.format(pid, box) # Load all the train / test persons and label their pids from 0 to N-1 # Assign pid = -1 for unlabeled background people if self._image_set == 'train': train = loadmat(osp.join(self._root_dir, 'annotation/test/train_test/Train.mat')) train = train['Train'].squeeze() for index, item in enumerate(train): scenes = item[0, 0][2].squeeze() for im_name, box, __ in scenes: im_name = str(im_name[0]) box = box.squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) else: test = loadmat(osp.join(self._root_dir, 'annotation/test/train_test/TestG50.mat')) test = test['TestG50'].squeeze() for index, item in enumerate(test): # query im_name = str(item['Query'][0,0][0][0]) box = item['Query'][0,0][1].squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) # gallery gallery = item['Gallery'].squeeze() for im_name, box, __ in gallery: im_name = str(im_name[0]) if box.size == 0: break box = box.squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) # Construct the gt_roidb gt_roidb = [] for im_name in self.image_index: boxes = name_to_boxes[im_name] boxes[:, 2] += boxes[:, 0] boxes[:, 3] += boxes[:, 1] pids = name_to_pids[im_name] num_objs = len(boxes) gt_classes = np.ones((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps[:, 1] = 1.0 overlaps = csr_matrix(overlaps) gt_roidb.append({ 'boxes': boxes, 'gt_classes': gt_classes, 'gt_overlaps': overlaps, 'gt_pids': pids, 'flipped': False}) pickle(gt_roidb, cache_file) print "wrote gt roidb to {}".format(cache_file) return gt_roidb
import os.path as osp import numpy as np from argparse import ArgumentParser from utils import pickle import caffe def write_matrix(mat, file_path): content = [' '.join(map(str, r)) for r in mat] with open(file_path, 'w') as f: f.write('\n'.join(content)) fp = 0.95 fn = 0.95 tp = 0.05 tn = 0.05 data = np.array([[fn, tn], [tp, fp]], dtype=np.float) write_matrix(data, '../data/infogain/Q17.txt') pickle(data, '../data/infogain/Q17.pkl') shape = data.shape shape = (1, ) * (4 - len(shape)) + shape data = data.reshape(shape) blob = caffe.proto.caffe_pb2.BlobProto() blob.num, blob.channels, blob.height, blob.width = data.shape blob.data.extend(list(data.ravel().astype(float))) with open('../data/infogain/Q17.binaryproto', 'wb') as f: f.write(blob.SerializeToString())
def gt_roidb(self): cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.isfile(cache_file): roidb = unpickle(cache_file) return roidb # Load all images and build a dict from image to boxes all_imgs = loadmat(osp.join(self._root_dir, 'annotation', 'Images.mat')) all_imgs = all_imgs['Img'].squeeze() name_to_boxes = {} name_to_pids = {} for im_name, __, boxes in all_imgs: im_name = str(im_name[0]) boxes = np.asarray([b[0] for b in boxes[0]]) boxes = boxes.reshape(boxes.shape[0], 4) valid_index = np.where((boxes[:, 2] > 0) & (boxes[:, 3] > 0))[0] assert valid_index.size > 0, \ 'Warning: {} has no valid boxes.'.format(im_name) boxes = boxes[valid_index] name_to_boxes[im_name] = boxes.astype(np.int32) name_to_pids[im_name] = -1 * np.ones(boxes.shape[0], dtype=np.int32) def _set_box_pid(boxes, box, pids, pid): for i in xrange(boxes.shape[0]): if np.all(boxes[i] == box): pids[i] = pid return print 'Warning: person {} box {} cannot find in Images'.format( pid, box) # Load all the train / test persons and label their pids from 0 to N-1 # Assign pid = -1 for unlabeled background people if self._image_set == 'train': train = loadmat( osp.join(self._root_dir, 'annotation/test/train_test/Train.mat')) train = train['Train'].squeeze() for index, item in enumerate(train): scenes = item[0, 0][2].squeeze() for im_name, box, __ in scenes: im_name = str(im_name[0]) box = box.squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) else: test = loadmat( osp.join(self._root_dir, 'annotation/test/train_test/TestG50.mat')) test = test['TestG50'].squeeze() for index, item in enumerate(test): # query im_name = str(item['Query'][0, 0][0][0]) box = item['Query'][0, 0][1].squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) # gallery gallery = item['Gallery'].squeeze() for im_name, box, __ in gallery: im_name = str(im_name[0]) if box.size == 0: break box = box.squeeze().astype(np.int32) _set_box_pid(name_to_boxes[im_name], box, name_to_pids[im_name], index) # Construct the gt_roidb gt_roidb = [] for im_name in self.image_index: boxes = name_to_boxes[im_name] boxes[:, 2] += boxes[:, 0] boxes[:, 3] += boxes[:, 1] pids = name_to_pids[im_name] num_objs = len(boxes) gt_classes = np.ones((num_objs), dtype=np.int32) overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) overlaps[:, 1] = 1.0 overlaps = csr_matrix(overlaps) gt_roidb.append({ 'boxes': boxes, 'gt_classes': gt_classes, 'gt_overlaps': overlaps, 'gt_pids': pids, 'flipped': False }) pickle(gt_roidb, cache_file) print "wrote gt roidb to {}".format(cache_file) return gt_roidb
def _add_to_tfrecord(filename, tfrecord_writer, offset=0): """Loads data from the cifar10 pickle files and writes files to a TFRecord. Args: filename: The filename of the cifar10 pickle file. tfrecord_writer: The TFRecord writer to use for writing. offset: An offset into the absolute number of images previously written. Returns: The new offset. """ with tf.gfile.Open(filename, 'rb') as f: if sys.version_info < (3,): data = cPickle.load(f) else: data = cPickle.load(f, encoding='bytes') images = data[b'data'] num_images = images.shape[0] images = images.reshape((num_images, 3, 32, 32)) labels = data[b'fine_labels'] coarse_labels = data[b'coarse_labels'] c2f_map = {} for lb, cl in zip(labels, coarse_labels): if cl not in c2f_map: c2f_map[cl] = {lb} else: c2f_map[cl].add(lb) utils.pickle(c2f_map, utils.root_path + '/data/cifar100/c2f_map.pkl') b2a_map = {} ind = 0 for c, fs in c2f_map.items(): for f in fs: b2a_map[f] = ind ind += 1 utils.pickle(b2a_map, utils.root_path + '/data/cifar100/b2a_map.pkl') a2b_map = {a: b for b, a in b2a_map.items()} # labels = [b2a_map[lb] for lb in labels] with tf.Graph().as_default(): image_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(image_placeholder) with tf.Session() as sess: for j in range(num_images): sys.stdout.write('\r>> Reading file [%s] image %d/%d' % ( filename, offset + j + 1, offset + num_images)) sys.stdout.flush() image = np.squeeze(images[j]).transpose((1, 2, 0)) lb = labels[j] png_string = sess.run(encoded_image, feed_dict={image_placeholder: image}) example = dataset_utils.image_to_tfexample( png_string, b'png', _IMAGE_SIZE, _IMAGE_SIZE, lb) tfrecord_writer.write(example.SerializeToString()) return offset + num_images