def get_content_html(self): try: get_content = requests.get(self.url, data={'User-Agent': self.user_agent}, verify=False) if self.test_connection: soup_objt = bs4_module.CoreParser(get_content.text, self.url) soup_objt.find_all_script() soup_objt.get_content_js() except ConnectionError as e: print(f"Connection error while try to parse the URL content \n{e}")
def get_content_js(self): test_conn = requests_module.CoreRequests(self.url_domain, self.name_target) if test_conn: current_dir = os.getcwd() path_save = "" #parsing_dir_name dir_name = urlparse(self.url_domain) dir_name = dir_name.hostname #dir_name = dir_name.hostname.replace(".", "_", -1) else: print(f"{self.url_domain} maybe down :/ ?") try: os.mkdir(str(dir_name)) print(f">> Create directory {dir_name}") print(f">> File will be saved at {dir_name}") except FileExistsError as e: print(f">> {e}") for url_src_tag in self.urls: arrays_match = [] try: if url_src_tag[0:2] == "//": url_src_tag = "http:" + url_src_tag print(colors.colors.fg.blue + f"[INFO] Getting info from: {url_src_tag}" + colors.colors.reset) r = requests.get(url_src_tag, verify=False, data={'User-Agent:': self.user_agent}, stream=True) content_save = r.content for _, v in regex_modules.REGEX_PATT.items(): values_found = re.findall(v, r.text) if values_found: for v in values_found: if v in arrays_match: continue else: arrays_match.append(v) for url in arrays_match: if "aws" in url: print(colors.colors.fg.red + f"[AWS INFO] {url}" + colors.colors.reset) elif self.host in url: print(colors.colors.fg.orange + f"[DOMAIN INFO] {url}" + colors.colors.reset) elif self.name_target in url: print(colors.colors.fg.orange + f"[NAME INFO] {url}" + colors.colors.reset) else: print(colors.colors.fg.blue + f"[INFO URL] {url}" + colors.colors.reset) except ConnectionError as e: print( f">> Error while save content from \n{url_src_tag} \n {e}") try: name_save_url_tag = url_src_tag.replace(".", "_") name_save_url_tag = name_save_url_tag.replace("//", "_") name_save_url_tag = name_save_url_tag.replace("/", "_")[0:10] path_save = current_dir + "/" + dir_name + "/" + name_save_url_tag with open(path_save + ".js", 'wb') as f: f.write(content_save) except FileNotFoundError as e: print(f">> Error while saving JS content to parse \n {e}")