def rawHashFile(archPath):
    archIterator = uar.ArchiveReader(archPath)

    fnames = [item[0] for item in archIterator]
    fset = set(fnames)
    if len(fnames) != len(fset):
        print(fnames)
        print(fset)
        raise ValueError("Wat?")

    try:
        for fName, fp in archIterator:
            fCont = fp.read()
            fName, hexHash, pHash, imX, imY = hasher.hashFile(
                archPath, fName, fCont)

            insertArgs = {
                "internalPath": fName.rjust(25),
                "itemHash": hexHash,
                "pHash": str(pHash).rjust(25),
            }

            print(insertArgs)
    except:
        print(archPath)
        raise
	def test_hashImage2_c(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'Lolcat_this_is_mah_job_small.jpg')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "40d39c436e14282dcda06e8aff367307" )
		self.assertEqual(pHash,   -4992890192511777340 )
		self.assertEqual(imX,     300 )
		self.assertEqual(imY,     237)
	def test_hashImage3(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'lolcat-crocs.jpg')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "6d0a977694630ac9d1d33a7f068e10f8" )
		self.assertEqual(pHash,   -7472365462264617431 )
		self.assertEqual(imX,     500 )
		self.assertEqual(imY,     363)
	def test_hashImage2(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'Lolcat_this_is_mah_job.jpg')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "d9ceeb6b43c2d7d096532eabfa6cf482" )
		self.assertEqual(pHash,   -4992890192511777340 )
		self.assertEqual(imX,     493 )
		self.assertEqual(imY,     389)
	def test_hashImage2_b(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'Lolcat_this_is_mah_job.png')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "1268e704908cc39299d73d6caafc23a0" )
		self.assertEqual(pHash,   -4992890192511777340 )
		self.assertEqual(imX,     493 )
		self.assertEqual(imY,     389)
	def test_hashImage1(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'dangerous-to-go-alone.jpg')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "dcd6097eeac911efed3124374f44085b" )
		self.assertEqual(pHash,   -7813072021139921681 )
		self.assertEqual(imX,     325 )
		self.assertEqual(imY,     307)
	def test_hashImage6(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'lolcat-oregon-trail.jpg')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, imX, imY = hashFile.hashFile(basePath, intName, fCont, shouldPhash=False)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "7227289a017988b6bdcf61fd4761f6b9")
		self.assertEqual(pHash,   None)
		self.assertEqual(imX,     None)
		self.assertEqual(imY,     None)
	def test_hashImage4(self):
		cwd = os.path.dirname(os.path.realpath(__file__))
		imPath = os.path.join(cwd, 'testimages', 'lolcat-oregon-trail.jpg')

		with open(imPath, "rb") as fp:
			fCont = fp.read()

		basePath, intName = "LOL", "WAT.jpg"
		fname, hexHash, pHash, dHash, imX, imY = hashFile.hashFile(basePath, intName, fCont)

		self.assertEqual(intName, fname)

		self.assertEqual(hexHash, "7227289a017988b6bdcf61fd4761f6b9")
		self.assertEqual(pHash,   -4955310669995365332)
		self.assertEqual(dHash,   -8660145558008088574)
		self.assertEqual(imX,     501)
		self.assertEqual(imY,     356)
Beispiel #9
0
    def test_hashImage5(self):
        cwd = os.path.dirname(os.path.realpath(__file__))
        imPath = os.path.join(cwd, 'testimages', 'lolcat-oregon-trail.jpg')

        with open(imPath, "rb") as fp:
            fCont = fp.read()

        basePath, intName = "LOL", "WAT"
        fname, hexHash, pHash, imX, imY = hashFile.hashFile(
            basePath, intName, fCont)

        self.assertEqual(intName, fname)

        self.assertEqual(hexHash, "7227289a017988b6bdcf61fd4761f6b9")
        self.assertEqual(pHash, None)
        self.assertEqual(imX, None)
        self.assertEqual(imY, None)
	def hashBareFile(self, wholePath, dbPath, doPhash=True):
		with open(wholePath, "rb") as fp:
			fCont = fp.read()

		fName, hexHash, pHash, imX, imY = hasher.hashFile(wholePath, "", fCont)

		insertArgs = {
					"fsPath"       :wholePath,
					"internalPath" :fName,     # fname == '' in this case
					"itemHash"     :hexHash,
					"pHash"        :pHash,
					"imgX"         :imX,
					"imgY"         :imY
				}

		self.dbApi.insertIntoDb(**insertArgs)

		self.putProgQueue("processed")
	def scanArchive(self, archPath, archData):
		# print("Scanning archive", archPath)

		archIterator = uar.ArchiveReader(archPath, fileContents=archData)

		fnames = [item[0] for item in archIterator]
		fset = set(fnames)
		if len(fnames) != len(fset):
			print(fnames)
			print(fset)
			raise ValueError("Wat?")

		self.dbApi.begin()

		try:
			for fName, fp in archIterator:

				fCont = fp.read()

				fName, hexHash, pHash, imX, imY = hasher.hashFile(archPath, fName, fCont)

				insertArgs = {
							"fsPath"       :archPath,
							"internalPath" :fName,
							"itemHash"     :hexHash,
							"pHash"        :pHash,
							"imgX"         :imX,
							"imgY"         :imY
						}


				self.dbApi.insertIntoDb(**insertArgs)
				self.putProgQueue("processed")
				if not scanner.runState.run:
					break
		except:
			print(archPath)
			self.dbApi.rollback()
			raise

		self.dbApi.commit()
		archIterator.close()
	def processImageFile(self, wholePath, dbFilePath):

		scan = True
		have = self.dbApi.getItem(fspath=dbFilePath, wantCols = ['phash', 'imgx', 'imgy'])
		if have and all(have):
			scan = False

		# print("Have hashes - ", dummy_itemHash, pHash, dHash)
		if scan:
			with open(wholePath, "rb") as fp:
				fCont = fp.read()
				try:

					fName, hexHash, pHash, imX, imY = hasher.hashFile(wholePath, "", fCont)

					insertArgs = {
								"fsPath"       :wholePath,
								"internalPath" :fName,     # fname == '' in this case
								"itemHash"     :hexHash,
								"pHash"        :pHash,
								"imgX"         :imX,
								"imgY"         :imY
							}

					# insert or update data row
					self.dbApi.upsert(**insertArgs)

					self.outQ.put("processed")

				except (IndexError, UnboundLocalError):
					self.tlog.error("Error while processing fileN")
					self.tlog.error("%s", wholePath)
					self.tlog.error("%s", traceback.format_exc())

				# self.log.info("Scanned bare image %s, %s, %s", fileN, pHash, dHash)

		else:
			self.outQ.put("skipped")