Example #1
0
            os.makedirs(FLAGS.feature_dir)
        except OSError:
            pass

    def read_image(self, name):
        """Reads the image and does the manipulation
        """
        img = datasets.imread_rgb(name)
        return datasets.manipulate(img, None, None, None, CENTER_CROP)

    def map(self, key, value):
        """key will be dummy, and value will be the image filename
        """
        imagename = os.path.basename(value)
        feature = self._conv.process( \
                self.read_image(os.path.join(FLAGS.image_dir, value)),\
                convbuffer = self._buffer)
        np.save(os.path.join(FLAGS.feature_dir, imagename), feature)
        yield self._hostname, imagename


mapreducer.REGISTER_DEFAULT_MAPPER(FeatureExtractionMapper)

# for Reduce, we will simply use the identity reducer.
mapreducer.REGISTER_DEFAULT_REDUCER(mapreducer.IdentityReducer)
mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader)
mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.PickleWriter)

if __name__ == "__main__":
    launcher.launch()
Example #2
0
        files = [value]
        image_lib = FLAGS.image_lib.lower()
        if image_lib == 'pil':
            resize_crop = PILResizeCrop()
        else:
            resize_crop = OpenCVResizeCrop()
        for i, line in enumerate(files):
            try:
                line = line.replace(FLAGS.input_folder, '').strip()
                line = line.split()
                image_file_name = line[0]
                input_file = os.path.join(FLAGS.input_folder, image_file_name)
                output_file = os.path.join(FLAGS.output_folder, image_file_name)
                output_dir = output_file[:output_file.rfind('/')]
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                feat = resize_crop.resize_and_crop_image(input_file, output_file,
                                                              FLAGS.output_side_length)
            except Exception as e:
                # we ignore the exception (maybe the image is corrupted?)
                print(line, Exception, e)
        yield value, FLAGS.output_folder

mapreducer.REGISTER_DEFAULT_MAPPER(ResizeCropImagesMapper)
mapreducer.REGISTER_DEFAULT_REDUCER(mapreducer.NoPassReducer)
mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader)
mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.FileWriter)
 
if __name__ == '__main__':
    launcher.launch()
Example #3
0
                                        dtype=FEATURE_DTYPE)
                features[i * 10:(i + 1) * 10] = feat
            except IOError:
                # we ignore the exception (maybe the image is corrupted or
                # pygist has some bugs?)
                print f, Exception, e
        outname = str(uuid.uuid4()) + '.npy'
        try:
            os.makedirs(FLAGS.output_folder)
        except OSError:
            pass
        np.save(os.path.join(FLAGS.output_folder, outname), features)
        yield value, outname


mapreducer.REGISTER_DEFAULT_MAPPER(DecafnetMapper)


class DecafnetReducer(mapreducer.BasicReducer):
    def reduce(self, key, values):
        """The Reducer basically renames the numpy file to the synset name
        Input:
            key: the synset name
            value: the temporary name from map
        """
        os.rename(os.path.join(FLAGS.output_folder, values[0]),
                  os.path.join(FLAGS.output_folder, key + '.npy'))
        return key


mapreducer.REGISTER_DEFAULT_REDUCER(DecafnetReducer)
Example #4
0
        files.sort()
        features = np.zeros((len(files), GIST_DIM), dtype=GIST_DTYPE)
        for i, f in enumerate(files):
            try:
                feat = process_image(f)
                features[i] = feat
            except Exception, e:
                # we ignore the exception (maybe the image is corrupted or
                # pygist has some bugs?)
                print f, Exception, e
        outname = str(uuid.uuid4()) + '.npy'
        np.save(os.path.join(FLAGS.output_folder, outname), features)
        yield value, outname


mapreducer.REGISTER_DEFAULT_MAPPER(PygistMapper)


class PygistReducer(mapreducer.BasicReducer):
    def reduce(self, key, values):
        """The Reducer basically renames the numpy file to the synset name
        Input:
            key: the synset name
            value: the temporary name from map
        """
        os.rename(os.path.join(FLAGS.output_folder, values[0]),
                  os.path.join(FLAGS.output_folder, key + '.npy'))
        return key


mapreducer.REGISTER_DEFAULT_REDUCER(PygistReducer)
Example #5
0
# value is a list of counts to be summed up.

# Optionally, we register the mappers and reducers as default so we do not need
# to specify them in the commandline arguments.


class WordCountMapper(mapreducer.BasicMapper):
    """The wordcount mapper"""
    def map(self, key, value):
        with open(value, 'r') as fid:
            for line in fid:
                for word in line.split():
                    yield word, 1


mapreducer.REGISTER_DEFAULT_MAPPER(WordCountMapper)


class WordCountReducer(mapreducer.BasicReducer):
    """The wordcount reducer"""
    def reduce(self, key, value):
        return sum(value)


mapreducer.REGISTER_DEFAULT_REDUCER(WordCountReducer)

# (3) Finally, the main entry: simply call launcher.launch() to start
# everything.

if __name__ == "__main__":
    launcher.launch()
        return all

    def map(self, key, value):
        """The map function. For every call, we download a random article. We
        do a pre-count inside the document so we do not generate multiple 1
        counts for the same word, which helps the reducer a little bit.
        """
        data = WikipediaMapper.get_random_wikipedia_article()
        counts = defaultdict(int)
        for word in data.split():
            counts[word] += 1
        for key in counts:
            yield key, counts[key]


mapreducer.REGISTER_DEFAULT_MAPPER(WikipediaMapper)


class WordCountReducer(mapreducer.BasicReducer):
    """The wordcount reducer"""
    def reduce(self, key, value):
        return sum(value)


mapreducer.REGISTER_DEFAULT_REDUCER(WordCountReducer)

# we need IterateReader instead of the default reader.
mapreducer.REGISTER_DEFAULT_READER(mapreducer.IterateReader)

if __name__ == "__main__":
    launcher.launch()