def process(options, collection, features): rootpath = options.rootpath blocksize = options.blocksize newfeature = options.newfeature src_features = features.split(',') if not newfeature: newfeature = '+'.join(src_features) new_feat_dir = os.path.join(rootpath, collection, 'FeatureData', newfeature) new_feat_file = os.path.join(new_feat_dir, 'feature.bin') if checkToSkip(new_feat_file, options.overwrite): return 0 imset = readImageSet(collection, collection, rootpath) nr_to_read = len(imset) / blocksize if blocksize*nr_to_read < len(imset): nr_to_read += 1 src_feat_files = [BigFile(os.path.join(rootpath, collection,'FeatureData',feature)) for feature in src_features] nr_fea = len(src_feat_files) src_feat_dims = [x.ndims for x in src_feat_files] new_feat_dim = sum(src_feat_dims) printStatus(INFO, '%s -> %s,%d' % (' '.join(['(%s,%d)' % (x[0],x[1]) for x in zip(src_features,src_feat_dims)]), newfeature, new_feat_dim)) makedirsforfile(new_feat_file) fw = open(new_feat_file, 'wb') id_images = [] for t in range(nr_to_read): start = t*blocksize end = min(len(imset), start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end-1)) todo = imset[start:end] nr_images = len(todo) if nr_images == 0: break mapping = dict(zip(todo, range(nr_images))) renamed = [None] * nr_fea vectors = [None] * nr_fea for i in range(nr_fea): tmp_names, tmp_vecs = src_feat_files[i].read(todo) assert(len(tmp_names) == nr_images) renamed[i] = [None] * nr_images vectors[i] = [None] * nr_images for name,vec in zip(tmp_names, tmp_vecs): j = mapping[name] renamed[i][j] = name vectors[i][j] = vec for j in range(nr_images): new_vec = vectors[0][j] for i in range(1,nr_fea): assert (renamed[i-1][j] == renamed[i][j]), '%s %s' % (renamed[i-1][j], renamed[i][j]) new_vec += vectors[i][j] assert(len(new_vec) == new_feat_dim) name = renamed[0][j] np.array(new_vec, dtype=np.float32).tofile(fw) id_images.append(name) fw.close() new_id_file = os.path.join(new_feat_dir, 'id.txt') fw = open(new_id_file, 'w') fw.write(' '.join(id_images)) fw.close() fw = open(os.path.join(new_feat_dir,'shape.txt'), 'w') fw.write('%d %d' % (len(id_images), new_feat_dim)) fw.close()
dstCollection = args[2] overwrite = 0 concepts = readConcepts(srcCollection, annotationName, rootpath) todo = [] for concept in concepts: resfile = os.path.join(rootpath, dstCollection, 'Annotations', 'Image', annotationName, '%s.txt' % concept) if checkToSkip(resfile, overwrite): continue todo.append(concept) if not todo: print('nothing to do') sys.exit(0) imset = set(readImageSet(dstCollection, dstCollection, rootpath)) for concept in todo: names, labels = readAnnotationsFrom(srcCollection, annotationName, concept, rootpath=rootpath) selected = [x for x in zip(names, labels) if x[0] in imset] print concept, len(selected) writeAnnotationsTo([x[0] for x in selected], [x[1] for x in selected], dstCollection, annotationName, concept, rootpath=rootpath) writeConceptsTo(concepts, dstCollection, annotationName, rootpath)
if __name__ == '__main__': args = sys.argv[1:] rootpath = '/var/scratch2/xirong/VisualSearch' srcCollection = args[0] annotationName = args[1] dstCollection = args[2] overwrite = 0 concepts = readConcepts(srcCollection, annotationName, rootpath) todo = [] for concept in concepts: resfile = os.path.join(rootpath, dstCollection, 'Annotations', 'Image', annotationName, '%s.txt'%concept) if checkToSkip(resfile, overwrite): continue todo.append(concept) if not todo: print ('nothing to do') sys.exit(0) imset = set(readImageSet(dstCollection, dstCollection, rootpath)) for concept in todo: names,labels = readAnnotationsFrom(srcCollection, annotationName, concept, rootpath=rootpath) selected = [x for x in zip(names,labels) if x[0] in imset] print concept, len(selected) writeAnnotationsTo([x[0] for x in selected], [x[1] for x in selected], dstCollection, annotationName, concept, rootpath=rootpath) writeConceptsTo(concepts, dstCollection, annotationName, rootpath)
def process(options, collection, features): rootpath = options.rootpath blocksize = options.blocksize newfeature = options.newfeature src_features = features.split(',') if not newfeature: newfeature = '+'.join(src_features) new_feat_dir = os.path.join(rootpath, collection, 'FeatureData', newfeature) new_feat_file = os.path.join(new_feat_dir, 'feature.bin') if checkToSkip(new_feat_file, options.overwrite): return 0 imset = readImageSet(collection, collection, rootpath) nr_to_read = len(imset) / blocksize if blocksize * nr_to_read < len(imset): nr_to_read += 1 src_feat_files = [ BigFile(os.path.join(rootpath, collection, 'FeatureData', feature)) for feature in src_features ] nr_fea = len(src_feat_files) src_feat_dims = [x.ndims for x in src_feat_files] new_feat_dim = sum(src_feat_dims) printStatus( INFO, '%s -> %s,%d' % (' '.join([ '(%s,%d)' % (x[0], x[1]) for x in zip(src_features, src_feat_dims) ]), newfeature, new_feat_dim)) makedirsforfile(new_feat_file) fw = open(new_feat_file, 'wb') id_images = [] for t in range(nr_to_read): start = t * blocksize end = min(len(imset), start + blocksize) printStatus(INFO, 'processing images from %d to %d' % (start, end - 1)) todo = imset[start:end] nr_images = len(todo) if nr_images == 0: break mapping = dict(zip(todo, range(nr_images))) renamed = [None] * nr_fea vectors = [None] * nr_fea for i in range(nr_fea): tmp_names, tmp_vecs = src_feat_files[i].read(todo) assert (len(tmp_names) == nr_images) renamed[i] = [None] * nr_images vectors[i] = [None] * nr_images for name, vec in zip(tmp_names, tmp_vecs): j = mapping[name] renamed[i][j] = name vectors[i][j] = vec for j in range(nr_images): new_vec = vectors[0][j] for i in range(1, nr_fea): assert (renamed[i - 1][j] == renamed[i][j] ), '%s %s' % (renamed[i - 1][j], renamed[i][j]) new_vec += vectors[i][j] assert (len(new_vec) == new_feat_dim) name = renamed[0][j] np.array(new_vec, dtype=np.float32).tofile(fw) id_images.append(name) fw.close() new_id_file = os.path.join(new_feat_dir, 'id.txt') fw = open(new_id_file, 'w') fw.write(' '.join(id_images)) fw.close() fw = open(os.path.join(new_feat_dir, 'shape.txt'), 'w') fw.write('%d %d' % (len(id_images), new_feat_dim)) fw.close()