def read_file(file_path, read_lines=False, mode='r', encoding='utf8'): """Read text from file""" if '.zip' in file_path.lower(): Z = zipimporter(get_zip_path(file_path)) stream = BytesIO(Z.get_data(file_path)) return stream.read() encoding = None if 'b' in mode else encoding with open(file_path, mode, encoding=encoding) as stream: if read_lines: return stream.readlines() else: return stream.read()
def run(self): latest_list = self.latest() tld_dict = {} for domain in latest_list: try: t = tldextract.tldextract.extract(domain) if t.suffix not in tld_dict: tld_dict[t.suffix] = [] tld_dict[t.suffix].append(domain) except Exception as e: print(e) ret = {} if len(tld_dict) > 0: Path(self.save_path).mkdir(parents=True, exist_ok=True) for tld in tld_dict: ret[tld] = tld_dict[tld] filename = "{}.txt.gz".format(tld) text_string_list = '\n'.join(tld_dict[tld]) text_string_bytes_object = BytesIO() text_string_bytes_object.write(text_string_list.encode('utf-8')) text_string_bytes_object.seek(0) with gzip.open('{0}{1}'.format(self.save_path, filename), 'wb') as f: f.write(text_string_bytes_object.read()) return ret
def send_fcgi_response(request, data, response): io = BytesIO(data) rec = Record() rec.read(io) resp = rec resp.type = FCGI_STDOUT resp.contentData = response resp.contentLength = len(response) out = BytesIO() resp.write(out) out.seek(0) request.sendall(out.read()) resp.type = FCGI_END_REQUEST resp.contentData = "" resp.contentLength = 0 out = BytesIO() resp.write(out) out.seek(0) request.sendall(out.read())
def _download_single_zone_file(self, url): response = self._get(url) status_code = response.status_code if status_code == 200: zone_name = url.rsplit('/', 1)[-1].rsplit('.')[-2] compressed_file = BytesIO(response.content) _, option = cgi.parse_header( response.headers['content-disposition']) filename = option['filename'] if not filename: filename = zone_name + '.txt.gz' path_filename = "{}{}".format(self.save_path, filename) decompressed_file = gzip.GzipFile(fileobj=compressed_file, mode='rb') text_list = [] for line in decompressed_file.readlines(): domain = line.decode('utf-8').split('\t')[0].rstrip('.') text_list.append(domain) text_string_list = '\n'.join(list(set(text_list))) text_string_bytes_object = BytesIO() text_string_bytes_object.write(text_string_list.encode('utf-8')) text_string_bytes_object.seek(0) text_string_buf = text_string_bytes_object.read() gzip_object = gzip.compress(text_string_buf) gzip_size = gzip_object.__sizeof__() MAX_FILE_SIZE = 1024 * 1024 * 35 if gzip_size >= MAX_FILE_SIZE: chapters = 0 source_buf = gzip_object n = MAX_FILE_SIZE final = [ source_buf[i * n:(i + 1) * n] for i in range((len(source_buf) + n - 1) // n) ] # list comprehension chunker for chunk in final: chapters += 1 chapter_string = "{}".format(chapters) chapter_string = chapter_string.zfill(2) chapter_filename = "{}_{}{}".format( zone_name, chapter_string, '.txt.gz') chapter_path_filename = "{}{}".format( self.save_path, chapter_filename) with open(chapter_path_filename, 'wb+') as f: f.write(chunk) else: with open(path_filename, 'wb+') as f: f.write(gzip_object) elif status_code == 401: self.token = self.authenticate() elif status_code == 404: pass else: pass