Exemple #1
0
	def pull_avitar(self):
		print self.emit()

		t = time()
		avi = self.addAsset(None, "%s_%d.png" % (generateMD5Hash(content=self.profile_image_url), t),
			description="user's avitar at %d" % t, tags=[ASSET_TAGS['FD_AVI']])

		if avi is None:
			return False

		with settings(warn_only=True):
			local("wget -O %s %s" % (os.path.join(ANNEX_DIR, avi), self.profile_image_url))

		import pypuzzle
		puzz = pypuzzle.Puzzle()

		try:
			cvec = puzz.get_cvec_from_file(os.path.join(ANNEX_DIR, avi))
			self.addAsset(cvec, "avitar_image_cvec_%d.json" % t, as_literal=False, tags=[ASSET_TAGS['IMAGE_CVEC']])
			return True
		except Exception as e:
			if DEBUG:
				print "Could not get image vector because %s" % e

		return False
    def visually_dedupe_emotes(self):
        logger.info('Beginning visually_dedupe_emotes()')
        processed_emotes = []
        duplicates = []
        puzzle = pypuzzle.Puzzle()
        # Some images like 'minigunkill' got a generic vector (a vector consisting of only zero's)
        # These images where merged with other images who also got a generic vector.
        # Setting noise cutoff fixed this.
        puzzle.set_noise_cutoff(0)

        for subreddit in self.subreddits:
            subreddit_emotes = [x for x in self.emotes if x['sr'] == subreddit]

            logger.info('Visually dedupeing emotes in subreddit ' + subreddit)
            for emote in subreddit_emotes:

                if emote in duplicates:
                    continue

                # Ignore animations as they sometime start with a blank (transparant) frame.
                # We only check the first frame and thus they are visually the same as any other blank picture.
                if emote['base_img_animation'] or (
                        has_hover(emote) and emote['hover_img_animation']):
                    continue

                image_path = get_single_image_path(self.output_dir, emote)
                logger.debug('puzzle.get_cvec_from_file(' + image_path + ')')
                vector = puzzle.get_cvec_from_file(image_path)

                for other_emote, other_compressed_vector in processed_emotes:
                    other_vector = puzzle.uncompress_cvec(
                        other_compressed_vector)

                    if other_emote in duplicates:
                        continue

                    distance = puzzle.get_distance_from_cvec(
                        vector, other_vector)
                    if (distance > 0):
                        pass  # Images are not equal.
                    else:
                        # Images are equal! Lets merge them.
                        self._merge_emotes(other_emote, emote)
                        duplicates.append(emote)
                processed_emotes.append((emote, puzzle.compress_cvec(vector)))

        self.emotes = [
            emote for emote in self.emotes if emote not in duplicates
        ]
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--dataset_path", type=str, required=True)
    parser.add_argument("-t",
                        "--threshold",
                        type=float,
                        required=False,
                        default=0.2)
    args = parser.parse_args()

    features_path = args.dataset_path + "/images/"
    labels_path = args.dataset_path + "/labels/"
    duplicates_path = args.dataset_path + "/duplicate/"

    if not utils.valid_dataset(args.dataset_path):
        print "Invalid dataset"
        sys.exit(-1)

    utils.make_dirs([duplicates_path])

    images = utils.collect_images(features_path)
    vectors = collect_vectors(images)

    duplicate_log = open(duplicates_path + "/duplicates.log", "w")
    puzzle = pypuzzle.Puzzle()
    for i in range(len(vectors) - 1):
        for j in range(i + 1, len(vectors)):
            if not utils.exists_paths([images[i], images[j]]):
                continue

            threshold = abs(
                puzzle.get_distance_from_cvec(vectors[i], vectors[j]) -
                args.threshold)
            if threshold <= 0.01:
                duplicate_img = duplicates_path + os.path.basename(images[j])
                shutil.move(images[j], duplicate_img)
                shutil.move(
                    labels_path + os.path.basename(images[j]),
                    args.dataset_path + "/duplicate/label_" +
                    os.path.basename(images[j]))
                duplicate_log.write("Duplicate " + str(images[i]) + " " +
                                    str(images[j]) + " threshold " +
                                    str(threshold) + '\n')
Exemple #4
0
def get_image_vector(uv_task):
    task_tag = "AVI: GETTING IMAGE VECTOR"

    print "\n\n************** %s [START] ******************\n" % task_tag
    uv_task.setStatus(302)

    from lib.Worker.Models.uv_document import UnveillanceDocument
    from conf import ANNEX_DIR
    import pypuzzle

    image = UnveillanceDocument(_id=uv_task.doc_id)
    puzz = pypuzzle.Puzzle()

    try:
        cvec = puzz.get_cvec_from_file(os.path.join(ANNEX_DIR,
                                                    image.file_name))
    except Exception as e:
        error_msg = "Could not get image vector because %s" % e

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg)
        return

    from vars import ASSET_TAGS

    if not image.addAsset(cvec,
                          "image_cvec.json",
                          as_literal=False,
                          tags=[ASSET_TAGS['IMAGE_CVEC']]):
        error_msg = "could not save cvec asset!"

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg)
        return

    print "\n\n************** %s [END] ******************\n" % task_tag
    uv_task.finish()
Exemple #5
0
def collect_vectors(images):
    puzzle = pypuzzle.Puzzle()
    return [puzzle.get_cvec_from_file(img) for img in images]
Exemple #6
0
 def setUp(self):
    self.puzzle = pypuzzle.Puzzle()
Exemple #7
0
def compare_avis(uv_task):
    task_tag = "CLUSTER: COMPARING 2 AVIS"
    print "\n\n************** %s [START] ******************\n" % task_tag

    uv_task.setStatus(302)

    if not hasattr(uv_task, 'avis') or len(uv_task.avis != 2):
        error_msg = "Cannot compare anything."

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg, status=412)
        return

    from lib.Worker.Models.uv_document import UnveillanceDocument

    try:
        avis = map(lambda a: UnveillanceDocument(_id=a), uv_task.avis)
    except Exception as e:
        error_msg = "could not load up avis as UnveillanceDocuments: %s" % e

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg, status=412)
        return

    from conf import ANNEX_DIR
    from vars import ASSET_TAGS

    from json import loads
    import pypuzzle

    puzz = pypuzzle.Puzzle()

    try:
        compare_avi = puzz.get_distance_from_cvec(
            *(map(lambda a: loads(a.loadAsset("image_cvec.json")), avis)))
    except Exception as e:
        error_msg = "could not get one or more image vectors because %s" % e

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg, status=412)
        return

    if type(compare_avi) not in [int, float]:
        error_msg = "non-numerical result for comparaison."

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg, status=412)
        return

    c_map = {
        'avis': map(lambda a: {
            'file_name': a.file_name,
            '_id': a._id
        }, avis),
        'compared': compare_avi
    }

    if not uv_task.addAsset(c_map,
                            "compare_avi_output.json",
                            as_literal=False,
                            tags=[ASSET_TAGS['C_RES']]):
        error_msg = "could not save result asset to this task."

        print error_msg
        print "\n\n************** %s [ERROR] ******************\n" % task_tag

        uv_task.fail(message=error_msg)
        return

    print "\n\n************** %s [END] ******************\n" % task_tag
    uv_task.finish()
Exemple #8
0
def matchImg(eva, evb):
    puzzle = pypuzzle.Puzzle()
    eva_vec = puzzle.uncompress_cvec(tuple(map(int, eva.split(":"))))
    evb_vec = puzzle.uncompress_cvec(tuple(map(int, evb.split(":"))))
    return puzzle.get_distance_from_cvec(eva_vec, evb_vec)
    pass
Exemple #9
0
def ImgCVEC(path):
    puzzle = pypuzzle.Puzzle()
    vec = puzzle.get_cvec_from_file(path)
    cmp_vec = puzzle.compress_cvec(vec)
    return ':'.join(map(str, cmp_vec))
    pass