def __init__(self): console.section('Picture Age and Gender Detection') console.task('Opening Webdriver') self.driver = cfg.getWebDriver() self.ages = [] self.males = [] self.females = []
def __init__(self): self.max_pages = cfg.google_img_pages() console.section('Google Reverse Image Search') console.task('Opening Webdriver') self.driver = cfg.getWebDriver() self.links = [] self.predictions = []
def grabLinks(self): img_urls = [] console.task('Opening Webdriver') driver = cfg.getWebDriver() for profile_url in self._pl: driver.get(profile_url) #first possibility profile_img_links = driver.find_elements_by_xpath( "/html/body/div[1]/div[4]/div[1]/div/div[2]/div[2]/div[2]/div/div[1]/div[1]/div[3]/div/div[2]/div[3]/div/div/div/img" ) for e in profile_img_links: img_src = e.get_attribute("src") img_urls.append(img_src) #second possivility profile_img_links = driver.find_elements_by_xpath( "/html/body/div[1]/div[1]/div[3]/div[1]/div/div/div[1]/div/div/div[1]/div/div/div/a/div/img" ) for e in profile_img_links: img_src = e.get_attribute("src") img_urls.append(img_src) driver.close() return list(set(img_urls))
def collectAges(self, img_url): if not self.driver: self.driver = cfg.getWebDriver() console.task('New Image: {0}'.format(img_url.strip()[:90])) driver = self.driver driver.get("http://www.pictriev.com/?lang=en") console.subtask('Inserting Image URL') input = driver.find_elements_by_xpath('//*[@id="urltext"]')[0] input.clear() input.send_keys(img_url) btn = driver.find_elements_by_xpath('//*[@id="submit-url"]')[0] btn.click() console.subtask('Searching for Image...') time.sleep(cfg.timeout() * 3) try: age = driver.find_elements_by_css_selector( '#age-gauge > svg:nth-child(1) > text:nth-child(6) > tspan:nth-child(1)' ) except: age = driver.find_elements_by_css_selector( '#age-gauge > svg:nth-child(1) > text:nth-child(6) > tspan:nth-child(1)' ) if len(age) == 1: age = age[0].text else: age = 0 self.ages.append(int(age))
def __init__(self): console.section('Age Detection') console.task('Opening Webdriver') self.driver = None self.ages = [] self.males = [] self.females = []
def collectLinks(self, img_url): l_unreal = [] console.task('New Image: {0}'.format(img_url.strip()[:90])) driver = self.driver driver.get("https://www.yandex.com/images/") console.subtask('Inserting Image URL') elems = driver.find_elements_by_xpath( '/html/body/div[1]/div/div[1]/div[2]/form/div[1]/span/span/table/tbody/tr/td[2]/div/button' )[0] elems.click() input = driver.find_elements_by_xpath( '/html/body/div[3]/div/div[1]/div/form[2]/span/span/input')[0] input.clear() input.send_keys(img_url) input.send_keys(Keys.RETURN) console.subtask('Searching for Image...') time.sleep(cfg.timeout()) link_name = driver.find_elements_by_xpath( '/html/body/div[6]/div[1]/div[1]/div[3]/ul/li/div/a[2]') console.subtask("Collecting Links...") for link in link_name: href = link.get_attribute('href') l_unreal.append(href) console.subtask("Getting real links from Yandex ShortURLs") l_real = [] for l_u in l_unreal: driver.get(l_u) if (filterLink(driver.current_url)): l_real.append(driver.current_url) console.subtask('Added verified {0}'.format( driver.current_url.strip()[:90])) self.links = l_real
def presentResult(predictions): if len(predictions) > 0: argmax = Counter(predictions) console.section("Result") if len(argmax.most_common(n=1)) > 0: (most_common_str, _) = argmax.most_common(n=1)[0] else: most_common_str = 'None' console.task("Google says it could be: {0}".format(most_common_str)) else: console.failure("No predictions found")
def loadKnown(self, label): console.task('Loading known faces') pathlist = Path('./known').glob('**/*.jpg') for path in pathlist: p_str = str(path) console.subtask('Loading {0}'.format(p_str.split('/')[1])) im = face_recognition.load_image_file(p_str) encoding = face_recognition.face_encodings( im, num_jitters=self.num_jitters) for e in encoding: self.known_face_encodings.append(e) self.known_face_names.append(label)
def collectLinks(self, img_url): console.task('New Image: {0}'.format(img_url.strip()[:90])) driver = self.driver driver.get("https://www.google.com/imghp") console.subtask('Inserting Image URL') console.task("Please agree to google's stuff in the browser") time.sleep(10) elems = driver.find_elements_by_xpath(self.PHOTO_XPATH)[0] elems.click() time.sleep(1) input = driver.find_elements_by_xpath('//*[@id="Ycyxxc"]')[0] input.clear() input.send_keys(img_url) input.send_keys(Keys.RETURN) console.subtask('Searching for Image...') time.sleep(cfg.timeout() * 2) pred_error = False try: pred = driver.find_element_by_xpath(self.PRED_XPATH) except NoSuchElementException: console.subfailure('No Prediction given sry...') pred = None pred_error = True except BrokenPipeError: #just try again... try: pred = driver.find_element_by_xpath(self.PRED_XPATH) except NoSuchElementException: console.subfailure( 'Broken pipe Error. This is not a Problem...moving on!') console.subfailure('No Prediction given sry...') pred = None pred_error = True if not pred_error: pred = pred.text self.predictions.append(pred) console.subtask("Collecting Links...(Page 1)") self.getLinks() for num in range(2, self.max_pages + 1): console.subtask("Switching to Page {0}".format(num)) try: page_n = driver.find_element_by_link_text(str(num)) page_n.click() time.sleep(cfg.timeout()) console.subtask("Collecting Links...(Page {0})".format(num)) self.getLinks() except NoSuchElementException: console.subfailure('No more pages...') break
def collectLinks(self, img_url): console.task('New Image: {0}'.format(img_url.strip()[:90])) driver = self.driver driver.get("https://www.google.com/imghp") console.subtask('Inserting Image URL') elems = driver.find_elements_by_xpath('//*[@id="qbi"]')[0] elems.click() time.sleep(1) input = driver.find_elements_by_xpath('//*[@id="qbui"]')[0] input.clear() input.send_keys(img_url) input.send_keys(Keys.RETURN) console.subtask('Searching for Image...') time.sleep(cfg.timeout()) try: pred = driver.find_element_by_xpath( "/html/body/div[5]/div[3]/div[3]/div[1]/div[2]/div/div[2]/div[1]/div/div[2]/a" ) pred = pred.text except NoSuchElementException: console.subfailure('No Prediction given sry...') pred = None self.predictions.append(pred) link_name = driver.find_elements_by_xpath(".//h3[@class='r']/a") console.subtask("Collecting Links...(Page 1)") for link in link_name: href = link.get_attribute('href') if filterLink(href): console.subtask('Added {0}'.format(href)) self.links.append(href) for num in range(2, self.max_pages + 1): console.subtask("Switching to Page {0}".format(num)) try: page_n = driver.find_element_by_link_text(str(num)) page_n.click() time.sleep(cfg.timeout()) console.subtask("Collecting Links...(Page {0})".format(num)) link_name = driver.find_elements_by_xpath( ".//h3[@class='r']/a") for link in link_name: href = link.get_attribute('href') if filterLink(href): console.subtask('Added {0}'.format(href)) self.links.append(href) except NoSuchElementException: console.subfailure('No more pages...') break
def downloadCSV(self): console.task('Waiting for page to finish') while "Loading" in self.driver.page_source: sys.stdout.write(".") sys.stdout.flush() time.sleep(1) print('') console.task('Downloading CSV') time.sleep(2) try: dl = self.driver.find_elements_by_xpath('//*[@id="dltop"]')[0] dl.click() except: console.failure('No Results...') self.driver.close()
def collectLinksLocal(self): l_unreal = [] console.task('Uploading Local Images') driver = self.driver driver.get("https://www.yandex.com/images/") pa_g = Path('./known') pathlist = [] for ext in [ '.jpg', '.JPG', '.png', '.PNG', '.jpeg', '.JPEG', '.bmp', '.BMP' ]: tmp_pl = pa_g.glob('**/*{}'.format(ext)) for t in tmp_pl: pathlist.append(t) for p in pathlist: str_p = str(p) console.subtask('Inserting Image URL') time.sleep(5) elems = driver.find_elements_by_xpath( '/html/body/div[1]/div[2]/div[1]/div[1]/div[1]/div/div/div/div[1]/div[2]/form/div[1]/span/span[2]' )[0] elems.click() input = driver.find_elements_by_xpath( '/html/body/div[1]/div[2]/div[1]/div[1]/div[1]/div/div/div/div[4]/div/div[1]/div/form[1]/input' )[0] input.clear() p_i = os.path.join(os.getcwd(), str_p) input.send_keys(p_i) console.subtask('Searching for Image...') time.sleep(cfg.timeout()) link_name = driver.find_elements_by_xpath( '/html/body/div[6]/div[1]/div[1]/div[3]/ul/li/div/a[2]') console.subtask("Collecting Links...") for link in link_name: href = link.get_attribute('href') l_unreal.append(href) console.subtask("Getting real links from Yandex ShortURLs") l_real = [] for l_u in l_unreal: driver.get(l_u) if (filterLink(driver.current_url)): l_real.append(driver.current_url) console.subtask('Added verified {0}'.format( driver.current_url.strip()[:90])) for l in l_real: self.links.append(l)
def grabData(self): console.task('Opening Webdriver') driver = cfg.getWebDriver() driver.get(self.url) self.profile_list = [] self.profile_img = [] console.task("Please agree to facebook's stuff in the browser") time.sleep(10) #get all profile image links profile_img_links = driver.find_elements_by_xpath( "//a[@class='_2ial']") console.subtask('Collecting Image URLs...(Page 1)') if len(profile_img_links) <= 0: console.subfailure('No FB Links found') else: for e in profile_img_links: href = e.get_attribute("href") image = e.find_element_by_tag_name("img") img_src = image.get_attribute("src") self.profile_list.append(href) self.profile_img.append(img_src) pages = driver.find_elements_by_xpath("//a") pages_links = [] for e in pages: link = e.get_attribute('href') if "&page" in link: pages_links.append(link) pages_links = list(set(pages_links)) for page in pages_links: driver.get(page) profile_img_links = driver.find_elements_by_xpath( "//a[@class='_2ial']") page_num = page[-1:] console.subtask( 'Collecting Images URLs...(Page {0})'.format(page_num)) for e in profile_img_links: href = e.get_attribute("href") image = e.find_element_by_tag_name("img") img_src = image.get_attribute("src") self.profile_list.append(href) self.profile_img.append(img_src) time.sleep(1) driver.close()
def loadKnown(self, label): console.task('Loading known faces') pa_g = Path('./known') pathlist = [] for ext in ['.jpg', '.JPG', '.png', '.PNG', '.jpeg', '.JPEG', '.bmp', '.BMP']: tmp_pl = pa_g.glob('**/*{}'.format(ext)) for t in tmp_pl: pathlist.append(t) for path in pathlist: p_str = str(path) delim = '/' if platform == "win32": delim = '\\' console.subtask('Loading {0}'.format(p_str.split(delim)[1])) im = face_recognition.load_image_file(p_str) encoding = face_recognition.face_encodings(im, num_jitters=self.num_jitters) for e in encoding: self.known_face_encodings.append(e) self.known_face_names.append(label)
def grabData(self): console.task('Opening Webdriver') driver = cfg.getWebDriver() driver.get(self.url) self.profile_list = [] self.profile_img = [] #get all profile image links profile_img_links = driver.find_elements_by_xpath( "//a[@class='_2ial _8o _8s lfloat _ohe']") console.subtask('Collecting Image URLs...(Page 1)') for e in profile_img_links: href = e.get_attribute("href") image = e.find_element_by_tag_name("img") img_src = image.get_attribute("src") self.profile_list.append(href) self.profile_img.append(img_src) pages = driver.find_elements_by_xpath("//a") pages_links = [] for e in pages: link = e.get_attribute('href') if "&page" in link: pages_links.append(link) pages_links = list(set(pages_links)) for page in pages_links: driver.get(page) profile_img_links = driver.find_elements_by_xpath( "//a[@class='_2ial _8o _8s lfloat _ohe']") page_num = page[-1:] console.subtask( 'Collecting Images URLs...(Page {0})'.format(page_num)) for e in profile_img_links: href = e.get_attribute("href") image = e.find_element_by_tag_name("img") img_src = image.get_attribute("src") self.profile_list.append(href) self.profile_img.append(img_src) time.sleep(1) driver.close()
def constructIndexes(self, label): valid_links = [] console.section('Analyzing') file_name = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) file_name += '.jpg' tmp_path = os.path.join(tempfile.gettempdir(), file_name) console.task("Storing Image in {0}".format(tmp_path)) for num, i in enumerate(self.profile_img): console.task('Analyzing {0}...'.format(i.strip()[:90])) urlretrieve(i, tmp_path) frame = cv2.imread(tmp_path) big_frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0) rgb_small_frame = big_frame[:, :, ::-1] face_locations = face_recognition.face_locations(rgb_small_frame) face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations, num_jitters=self.num_jitters) face_names = [] for face_encoding in face_encodings: # See if the face is a match for the known face(s) matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding) name = "Unknown" # If a match was found in known_face_encodings, just use the first one. if True in matches: first_match_index = matches.index(True) name = self.known_face_names[first_match_index] face_names.append(name) for _, name in zip(face_locations, face_names): if name == label: valid_links.append(num) if os.path.isfile(tmp_path): console.task("Removing {0}".format(tmp_path)) os.remove(tmp_path) return valid_links
def presentResult(predictions): argmax = Counter(predictions) console.section("Result") (most_common_str, _) = argmax.most_common(n=1)[0] console.task("Google says it could be: {0}".format(most_common_str))
def __init__(self): console.section('ImageRaider Reverse Image Search') console.task('Opening Webdriver') self.driver = cfg.getWebDriver() self.csv_error = False
def collectLinksLocal(self): driver = self.driver console.section("Uploading Local Known Images") pa_g = Path('./known') pathlist = [] for ext in [ '.jpg', '.JPG', '.png', '.PNG', '.jpeg', '.JPEG', '.bmp', '.BMP' ]: tmp_pl = pa_g.glob('**/*{}'.format(ext)) for t in tmp_pl: pathlist.append(t) for p in pathlist: str_p = str(p) driver.get("https://www.google.com/imghp") console.task("Please agree to google's stuff in the browser") time.sleep(10) elems = driver.find_elements_by_xpath(self.PHOTO_XPATH)[0] elems.click() time.sleep(1) elems = driver.find_element_by_xpath(self.PHOTO_UPLOAD_XPATH) elems.click() time.sleep(1) console.subtask("Inserting Path") input_box = driver.find_element_by_xpath('//*[@id="awyMjb"]') p_i = os.path.join(os.getcwd(), str_p) input_box.send_keys(p_i) time.sleep(cfg.timeout() * 2) pred_error = False try: pred = driver.find_element_by_xpath(self.PRED_XPATH) except NoSuchElementException: console.subfailure('No Prediction given sry...') pred = None pred_error = True except BrokenPipeError: #just try again... try: pred = driver.find_element_by_xpath(self.PRED_XPATH) except NoSuchElementException: console.subfailure( 'Broken pipe Error. This is not a Problem...moving on!' ) console.subfailure('No Prediction given sry...') pred = None pred_error = True if not pred_error: pred = pred.text self.predictions.append(pred) console.subtask("Collecting Links...(Page 1)") self.getLinks() for num in range(2, self.max_pages + 1): console.subtask("Switching to Page {0}".format(num)) try: page_n = driver.find_element_by_link_text(str(num)) page_n.click() time.sleep(cfg.timeout()) console.subtask( "Collecting Links...(Page {0})".format(num)) self.getLinks() except NoSuchElementException: console.subfailure('No more pages...') break
def main(skipFB=False, skipY=False, FBUrls=[], jsonRep=None): if not skipFB: # collect user input console.prompt('Enter the persons name to find on FB: ') name = input('') while not name: console.prompt('Enter the persons name to find on FB: ') name = input('') else: console.task('Skipping FB Search') name = "Unknown" console.prompt( 'How many jitters, higher is better [max 100] (default=70): ') num_jitters = input('') if not num_jitters: console.task('Settings jitters to 70') num_jitters = 70 num_jitters = int(num_jitters) if num_jitters > 100: console.subfailure('Dude wtf?!') num_jitters = 100 console.subfailure('Using 100 jitters...') if not skipFB: # grab profile urls f = FBGrabber(name) f.grabData() # do face recognition on those profile images r = FaceRecog(f.getProfileLinks(), f.getProfileImages(), num_jitters=num_jitters) r.loadKnown(name) profile_links, profile_imgs = r.getValidLinksAndImg(name) console.section('Result') console.task('Found the following Profiles:') for i in range(len(profile_links)): console.subtask(profile_links[i]) else: if len(FBUrls) > 0: f = FBProfileGrabber(FBUrls) img_urls = f.grabLinks() #FBURLS are our profile links synchron with img_urls # so FBURLS[0] <=> img_urls[0] r = FaceRecog(FBUrls, img_urls, num_jitters=num_jitters) r.loadKnown(name) profile_links, profile_imgs = r.getValidLinksAndImg(name) console.section('Result') console.task('Found the following Profiles:') for i in range(len(profile_links)): console.subtask(profile_links[i]) else: profile_links = [] profile_imgs = [] # google reverse image search on profile pics g = GoogleGrabber() for img in profile_imgs: g.collectLinks(img) # google reverse image search on reference pic g.collectLinksLocal() rev_links, predictions = g.finish() #TODO: Fix yandex search #if not skipY: if False: yandex = YandexGrabber() for img in profile_imgs: yandex.collectLinks(img) yandex.collectLinksLocal() #add to rev_links for e in yandex.finish(): rev_links.append(e) else: console.task('Skipping Yandex Search') rev_links = list(set(rev_links)) instaNames = parseInstaUsername(filterInstaLinks(rev_links)) validatedInstaNames = [] console.section("Validating Instagram Profiles") for un in instaNames: console.task("Validating Profile: '{0}'".format(un)) if validateInstaUser(un, num_jitters): validatedInstaNames.append(un) raider_img_list = profile_imgs for v in validatedInstaNames: l = getInstaLinks(v) for li in l: raider_img_list.append(li) if len(raider_img_list) <= 0: console.failure('No Links found...') else: console.task('RIP Imageraider') rev_links = list(set(rev_links)) predictions = list(set(predictions)) console.section('Links') print(rev_links) console.section('Predictions') try: predictions = [x.lower() for x in predictions] except: predictions = [] print(predictions) presentResult(predictions) for pl in profile_links: rev_links.append(pl) rev_links = list(set(rev_links)) #estimate age ageEstimator = PictrievGrabber() if len(validatedInstaNames) > 0: for v in validatedInstaNames: l = getInstaLinks(v) if len(l) >= cfg.instaLimit(): l = l[:cfg.instaLimit()] for li in l: ageEstimator.collectAges(li) age = ageEstimator.finish() else: console.failure('No Instagram Images to upload...') #ageEstimator.finish() age = "Unknown" if jsonRep: console.section("Dumping JSON Report") makeJSONReport(name, rev_links, predictions, validatedInstaNames, age, jsonRep) else: console.section("Creating PDF Report") makeReport(name, rev_links, predictions, validatedInstaNames, age) p = os.path.join(tempfile.gettempdir(), 'imageraider') if os.path.isdir(p): pathlist = Path(p).glob('**/*') for path in pathlist: s_p = str(path) os.remove(s_p) console.task("KTHXBYE")
def collectLinks(self, img_url): console.task('New Image: {0}'.format(img_url.strip()[:90])) driver = self.driver driver.get("https://www.google.com/imghp") console.subtask('Inserting Image URL') elems = driver.find_elements_by_xpath(self.PHOTO_XPATH)[0] elems.click() time.sleep(1) input = driver.find_elements_by_xpath('//*[@id="qbui"]')[0] input.clear() input.send_keys(img_url) input.send_keys(Keys.RETURN) console.subtask('Searching for Image...') time.sleep(cfg.timeout() * 2) pred_error = False try: pred = driver.find_element_by_xpath(self.PRED_XPATH) except NoSuchElementException: console.subfailure('No Prediction given sry...') pred = None pred_error = True except BrokenPipeError: #just try again... try: pred = driver.find_element_by_xpath(self.PRED_XPATH) except NoSuchElementException: console.subfailure( 'Broken pipe Error. This is not a Problem...moving on!') console.subfailure('No Prediction given sry...') pred = None pred_error = True if not pred_error: pred = pred.text self.predictions.append(pred) try: link_name = driver.find_elements_by_xpath("//*[@class='iUh30']") #link_name=driver.find_elements_by_xpath(".//h3[@class='r']/a") except BrokenPipeError: link_name = driver.find_elements_by_xpath("//*[@class='iUh30']") #link_name=driver.find_elements_by_xpath(".//h3[@class='r']/a") console.subtask("Collecting Links...(Page 1)") if len(link_name) <= 0: console.subfailure('No Links found') else: for link in link_name: #href = link.get_attribute('href') if link != None: href = link.text if filterLink(href): console.subtask('Added {0}'.format(href)) self.links.append(href) for num in range(2, self.max_pages + 1): console.subtask("Switching to Page {0}".format(num)) try: page_n = driver.find_element_by_link_text(str(num)) page_n.click() time.sleep(cfg.timeout()) console.subtask("Collecting Links...(Page {0})".format(num)) try: link_name = driver.find_elements_by_xpath( "//*[@class='iUh30']") except BrokenPipeError: link_name = driver.find_elements_by_xpath( "//*[@class='iUh30']") for link in link_name: href = link.text if filterLink(href): console.subtask('Added {0}'.format(href)) self.links.append(href) except NoSuchElementException: console.subfailure('No more pages...') break
def main(): # collect user input console.prompt('Enter the persons name to find on FB: ') name = input('') console.prompt('How many jitters, higher is better [max 100]: ') num_jitters = input('') num_jitters = int(num_jitters) if num_jitters > 100: console.subfailure('Dude wtf?!') num_jitters = 100 console.subfailure('Using 100 jitters...') # grab profile urls f = FBGrabber(name) f.grabData() # do face recognition on those profile images r = FaceRecog(f.getProfileLinks(), f.getProfileImages(), num_jitters=num_jitters) r.loadKnown(name) profile_links, profile_imgs = r.getValidLinksAndImg(name) console.section('Result') console.task('Found the following Profiles:') for i in range(len(profile_links)): console.subtask(profile_links[i]) # google reverse image search on profile pics g = GoogleGrabber() for img in profile_imgs: g.collectLinks(img) # google reverse image search on reference pic g.collectLinksLocal() rev_links, predictions = g.finish() yandex = YandexGrabber() for img in profile_imgs: yandex.collectLinks(img) #add to rev_links for e in yandex.finish(): rev_links.append(e) rev_links = list(set(rev_links)) instaNames = parseInstaUsername(filterInstaLinks(rev_links)) validatedInstaNames = [] console.section("Validating Instagram Profiles") for un in instaNames: console.task("Validating Profile: '{0}'".format(un)) if validateInstaUser(un, num_jitters): validatedInstaNames.append(un) raider_img_list = profile_imgs for v in validatedInstaNames: l = getInstaLinks(v) for li in l: raider_img_list.append(li) if len(raider_img_list) <= 0: console.failure('No Links founds...') else: raider = ImageRaiderGrabber() raider.insertImageLinks(raider_img_list) raider.downloadCSV() raider_links = raider.processCSV() for raider_link in raider_links: rev_links.append(raider_link) rev_links = list(set(rev_links)) predictions = list(set(predictions)) console.section('Links') print(rev_links) console.section('Predictions') try: predictions = [x.lower() for x in predictions] except: predictions = [] print(predictions) presentResult(predictions) console.section("Creating PDF Report") makeReport(name, rev_links, predictions, validatedInstaNames) p = os.path.join(tempfile.gettempdir(), 'imageraider') if os.path.isdir(p): pathlist = Path(p).glob('**/*') for path in pathlist: s_p = str(path) os.remove(s_p) console.task("KTHXBYE")
def __init__(self): console.section('Yandex Reverse Image Search') console.task('Opening Webdriver') self.driver = cfg.getWebDriver() self.links = []