def run(self): latest_list = self.latest() tld_dict = {} for domain in latest_list: try: t = tldextract.tldextract.extract(domain) if t.suffix not in tld_dict: tld_dict[t.suffix] = [] tld_dict[t.suffix].append(domain) except Exception as e: print(e) ret = {} if len(tld_dict) > 0: Path(self.save_path).mkdir(parents=True, exist_ok=True) for tld in tld_dict: ret[tld] = tld_dict[tld] filename = "{}.txt.gz".format(tld) text_string_list = '\n'.join(tld_dict[tld]) text_string_bytes_object = BytesIO() text_string_bytes_object.write(text_string_list.encode('utf-8')) text_string_bytes_object.seek(0) with gzip.open('{0}{1}'.format(self.save_path, filename), 'wb') as f: f.write(text_string_bytes_object.read()) return ret
def process_image(self, image_bytes): buf = BytesIO() buf.write(image_bytes) buf.seek(0) try: image = Image.open(buf) except IOError as e: self.logger.error(e) return False else: fn = 'screenshot_{}.jpeg'.format( str(datetime.datetime.now()).replace(' ', '_')) fp = os.path.join(self.screenshot_dir, fn) with open(fp, 'wb') as f: f.write(image_bytes) self.logger.info('Image written successfully: {}'.format(fp)) return True finally: del buf
def _download_single_zone_file(self, url): response = self._get(url) status_code = response.status_code if status_code == 200: zone_name = url.rsplit('/', 1)[-1].rsplit('.')[-2] compressed_file = BytesIO(response.content) _, option = cgi.parse_header( response.headers['content-disposition']) filename = option['filename'] if not filename: filename = zone_name + '.txt.gz' path_filename = "{}{}".format(self.save_path, filename) decompressed_file = gzip.GzipFile(fileobj=compressed_file, mode='rb') text_list = [] for line in decompressed_file.readlines(): domain = line.decode('utf-8').split('\t')[0].rstrip('.') text_list.append(domain) text_string_list = '\n'.join(list(set(text_list))) text_string_bytes_object = BytesIO() text_string_bytes_object.write(text_string_list.encode('utf-8')) text_string_bytes_object.seek(0) text_string_buf = text_string_bytes_object.read() gzip_object = gzip.compress(text_string_buf) gzip_size = gzip_object.__sizeof__() MAX_FILE_SIZE = 1024 * 1024 * 35 if gzip_size >= MAX_FILE_SIZE: chapters = 0 source_buf = gzip_object n = MAX_FILE_SIZE final = [ source_buf[i * n:(i + 1) * n] for i in range((len(source_buf) + n - 1) // n) ] # list comprehension chunker for chunk in final: chapters += 1 chapter_string = "{}".format(chapters) chapter_string = chapter_string.zfill(2) chapter_filename = "{}_{}{}".format( zone_name, chapter_string, '.txt.gz') chapter_path_filename = "{}{}".format( self.save_path, chapter_filename) with open(chapter_path_filename, 'wb+') as f: f.write(chunk) else: with open(path_filename, 'wb+') as f: f.write(gzip_object) elif status_code == 401: self.token = self.authenticate() elif status_code == 404: pass else: pass