def slexy(vars_dict): """ Scraping function for slexy. This one is almost identical to ix.io, with the exception of having some tables to dig through. It also has a heavier rate limit, so a minimum limiter is enforced :param vars_dict: dict of necessary variables returned from config() :return: nothing """ lib.print_status("Starting slexy run...") # Connect to archive and get parameters for individual documents soup = BeautifulSoup( lib.connect("https://slexy.org/recent", verify_ssl=False).text, 'html.parser') table = soup.find("table", attrs={'id': "recent_pastes"}) parameters = set([a['href'] for a in table.findAll('a', href=True)]) # Loop through parameters for param in parameters: # Connect and fetch the raw text document_soup = BeautifulSoup( lib.connect(f'https://slexy.org{param}', verify_ssl=False).text, 'html.parser') document_table = document_soup.findAll("table") raw_parameter = [ a['href'] for a in document_table[1].findAll('a', href=True) if 'raw' in a['href'] ][0] unprocessed = BeautifulSoup( lib.connect(f'https://slexy.org{raw_parameter}', verify_ssl=False).text, 'html.parser') # Pass to archive engine # We remove the /view/ from the param for file naming purposes identifier = f'slexy-{param.split("/view/")[1]}' lib.archive_engine(str(unprocessed), identifier, vars_dict) sleep(5) if vars_dict['limiter'] < 5 else sleep(vars_dict['limiter']) lib.print_success("All slexy pastes processed.")
def pastebin(vars_dict): """ This function fetches the pastebin archive and all the pastes in it. It passes them to archive_engine(), then sleeps per the time specified by vars_dict['cooldown'] :param vars_dict: dict of necessary variables returned from config() :return: Nothing """ # Fetch the pastebin public archive lib.print_status(f"Starting pastebin run...") arch_page = lib.connect("https://pastebin.com/archive") arch_soup = BeautifulSoup(arch_page.text, 'html.parser') sleep(2) # Parse the archive HTML to get the individual document URLs table = arch_soup.find("table", attrs={'class': "maintable"}) parameters = [ a['href'] for a in table.findAll('a', href=True) if 'archive' not in a['href'] ] # For each paste listed, connect and pass the text to archive_engine() for param in parameters: param = param[1:] # removes the leading forward slash document_page = lib.connect(f"https://pastebin.com/{param}") document_soup = BeautifulSoup(document_page.text, 'html.parser') # Fetch the raw text and pass to archive_engine() unprocessed = document_soup.find('textarea').contents[0] identifier = f'pastebin-{param}' lib.archive_engine(unprocessed, identifier, vars_dict) sleep(vars_dict['limiter']) lib.print_success("All pastebin pastes processed.")
def archive_engine(prescan_text, proch, vars_dict): if vars_dict['yara_scanning'] is True: matches = vars_dict['search_rules'].match(data=prescan_text) if matches: if matches[0].rule == 'blacklist': lib.print_status(f"Blacklisted term detected: [{((matches[0]).strings[0])[2].decode('UTF-8')}]") else: if matches[0].rule == 'b64Artifacts': lib.print_success(f"Base64 Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}]") decoded_content = b64decode(prescan_text) if ((matches[0]).strings[0])[2].decode('UTF-8') == "H4sI": decompressed_string = gzip.decompress(bytes(decoded_content, 'utf-8')) with codecs.open(f"{vars_dict['workpath']}{proch}.file", 'w+', 'utf-8') as savefile: savefile.write(decompressed_string) else: with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[1].decode('UTF-8').decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile: savefile.write(decoded_content) elif matches[0].rule == 'powershellArtifacts': lib.print_success(f"Powershell Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}]") with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.ps1", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) elif matches[0].rule == 'keywords': lib.print_success(f"Keyword found: [{((matches[0]).strings[0])[2].decode('UTF-8')}]") with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) else: with codecs.open(f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) else: with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) else: with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+', "utf-8") as savefile: savefile.write(prescan_text)
def main(args): lib.print_title(""" _________________________________________ [ ] [ ] [ Welcome to BinBot ] [ Made by Mili-NT ] [ ] [_______________________________________] Note: To load a config file, pass it as an argument """) # If filepath is passed, it passes that to config(). # If not, it passes an invalid path "" which results in manual setup vars_dict = config(args[1]) if len(args) > 1 else config("") try: # This creates a thread for every service enabled runs = 0 while True: with ThreadPoolExecutor( max_workers=len(vars_dict['services'])) as executor: for service in vars_dict['services']: executor.submit(collectors.services[service], vars_dict) runs += 1 # This line is a little weird, but due to True == 1 being True, isinstance(vars_dict['stop_input'], int) # wouldnt work. if str(vars_dict['stop_input']) != 'True': if runs >= vars_dict['stop_input']: lib.print_success(f"Runs Complete, Operation Finished...") exit() lib.print_status( f"All services scraped, cooling down for {vars_dict['cooldown']} seconds" ) sleep(vars_dict['cooldown'] / 2) lib.print_status("Halfway through cooldown.") sleep(vars_dict['cooldown'] / 2) lib.print_status("Continuing...") except KeyboardInterrupt: lib.print_status(f"Operation cancelled...") exit()
def Non_API_Search(vars_dict): arch_runs = 0 while True: if arch_runs > 0: lib.print_status(f"Runs: {arch_runs}") if arch_runs >= vars_dict['stop_input'] and vars_dict['stop_input'] is False: lib.print_success(f"Runs Complete, Operation Finished...") exit() else: lib.print_status(f"Pastes fetched, cooling down for {vars_dict['cooldown']} seconds...") sleep(vars_dict['cooldown']/2) lib.print_status(f"Halfway through cooldown") sleep(vars_dict['cooldown']/2) lib.print_status(f"resuming...") if arch_runs < vars_dict['stop_input'] or vars_dict['stop_input'] is True: arch_page = archive_connect() arch_soup = BeautifulSoup(arch_page.text, 'html.parser') sleep(2) lib.print_status(f"Getting archived pastes...") if 'access denied' in arch_page.text: lib.print_error(f"IP Temporarily suspending, pausing until the ban is lifted. Estimated time: one hour...") sleep(vars_dict['cooldown']) lib.print_status(f"Process resumed...") continue else: pass lib.print_status(f"Finding params...") table = arch_soup.find("table", class_="maintable") # Fetch the table of recent pastes while True: try: tablehrefs = table.findAll('a', href=True) # Find the <a> tags for every paste break except AttributeError: lib.print_error(f"IP Temporarily suspending, pausing until the ban is lifted. Estimated time: one hour...") sleep(vars_dict['cooldown']) lib.print_error(f"Process resumed...") continue for h in tablehrefs: proch = (h['href']).replace("/", "") # fetch the URL param for each paste lib.print_success("params fetched...") lib.print_status(f"Acting on param {proch}...") full_archpage, full_arch_url = parameter_connect(proch) item_soup = BeautifulSoup(full_archpage.text, 'html.parser') unprocessed = item_soup.find('textarea') # Fetch the raw text in the paste. taglist = [ '<textarea class="paste_code" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)">', '<textarea class="paste_code" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)">', '<textarea class="paste_textarea" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)" rows="10">', '</textarea>', '<textarea class="paste_code" id="paste_code" name="paste_code" onkeydown="return catchTab(this,event)">', ] for tag in taglist: unprocessed = str(unprocessed).replace(tag, "") # process the raw text by removing html tags archive_engine(unprocessed, proch, vars_dict) arch_runs += 1 sleep(vars_dict['limiter']) continue else: lib.print_success(f"Operation Finished...") break
def ixio(vars_dict): """ This is the scraping function for ix.io. It works very similar to the pastebin() function, and fetches a list of documents from an archive, processes them, and cools down :param vars_dict: dict of necessary variables returned from config() :return: nothing """ lib.print_status("Starting ix.io run...") # Connect to archive and gather individual document parameters soup = BeautifulSoup(lib.connect("http://ix.io/user/").text, 'html.parser') # The parameter is sanitized (has its leading and trailing forward slashes removed) during this comprehension parameters = set( [a['href'].replace('/', '') for a in soup.findAll('a', href=True)]) # Loop through parameters and get raw text for param in parameters: document_soup = BeautifulSoup( lib.connect(f'http://ix.io/{param}').text, 'html.parser') # Pass raw text to archive engine identifier = f'ixio-{param}' lib.archive_engine(str(document_soup), identifier, vars_dict) sleep(vars_dict['limiter']) lib.print_success("All ix.io pastes processed.")
def archive_engine(prescan_text, proch, vars_dict): if vars_dict['yara_scanning'] is True: matches = vars_dict['search_rules'].match(data=prescan_text) if matches: if matches[0].rule == 'blacklist': lib.print_status( f"Blacklisted term detected: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]" ) else: if matches[0].rule == 'b64Artifacts': lib.print_success( f"Base64 Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]" ) with codecs.open( f"{vars_dict['workpath']}{((matches[0]).strings[0])[1].decode('UTF-8').decode('UTF-8')}_{proch}.b64", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) elif matches[0].rule == 'powershellArtifacts': lib.print_success( f"Powershell Artifact Found: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]" ) with codecs.open( f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.ps1", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) elif matches[0].rule == 'keywords': lib.print_success( f"Keyword found: [{((matches[0]).strings[0])[2].decode('UTF-8')}] at [{datetime.now().strftime('%X')}]" ) with codecs.open( f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) else: with codecs.open( f"{vars_dict['workpath']}{((matches[0]).strings[0])[2].decode('UTF-8')}_{proch}.txt", 'w+', 'utf-8') as savefile: savefile.write(prescan_text) else: with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+', "utf-8") as savefile: savefile.write(prescan_text) else: with codecs.open(f"{vars_dict['workpath']}{proch}.txt", 'w+', "utf-8") as savefile: savefile.write(prescan_text)
def manual_setup(): # Save path while True: workpath = lib.print_input("Enter the path you wish to save text documents to (enter curdir for current directory)") if workpath.lower() == 'curdir': if name.lower() == 'nt': workpath = getcwd() else: workpath = syspath[0] if path.isdir(workpath): lib.print_success("Valid Path...") if workpath.endswith('\\') or workpath.endswith('/'): pass else: if name.lower == 'nt': workpath = workpath + str('\\') else: workpath = workpath + str('/') break else: lib.print_error("Invalid path, check input...") continue # Looping while True: try: stopinput_input = lib.print_input("Run in a constant loop? [y]/[n]") if stopinput_input.lower() == 'y': stop_input = True elif stopinput_input.lower() == 'n': stop_input = int(lib.print_input("Enter the amount of successful pulls you wish to make (enter 0 for infinite)")) # Limiter and Cooldown try: limiter = int(lib.print_input("Enter the request limit you wish to use (recommended: 5)")) except: limiter = 5 try: cooldown = int(lib.print_input("Enter the cooldown between IP bans/Archive scrapes (recommended: 1200)")) except: cooldown = 1200 break except ValueError: lib.print_error("Invalid Input.") continue while True: yara_choice = lib.print_input("Enable scanning documents using YARA rules? [y/n]") if yara_choice.lower() not in ['y', 'n', 'yes', 'no']: lib.print_error("Invalid Input.") continue elif yara_choice.lower() in ['y', 'yes']: yara_scanning = True break elif yara_choice.lower() in ['n', 'no']: yara_scanning = False break # Yara Compiling if yara_scanning is True: yara_dir = f"{getcwd()}/yara_rules" search_rules = yara.compile( filepaths={f.replace(".yar", ""): path.join(f'{yara_dir}/general_rules/', f) for f in listdir(f'{yara_dir}/general_rules/') if path.isfile(path.join(yara_dir, f)) and f.endswith(".yar")}) binary_rules = yara.compile( filepaths={f.replace(".yar", ""): path.join(f'{yara_dir}/binary_rules/', f) for f in listdir(f'{yara_dir}/binary_rules/') if path.isfile(path.join(yara_dir, f)) and f.endswith(".yar")}) else: search_rules = [] binary_rules = [] # Saving while True: savechoice = lib.print_input('Save configuration to file for repeated use? [y]/[n]') if savechoice.lower() == 'n': break elif savechoice.lower() == 'y': configname = lib.print_input("Enter the config name (no extension)") try: with open(configname + '.ini', 'w+') as cfile: cfile.write( f"""[initial_vars] workpath = {workpath} stop_input = {stop_input} limiter = {limiter} cooldown = {cooldown} yara_scanning = {yara_scanning}""") break except Exception as e: print(f"{e}") break vars_dict = { 'workpath': workpath, 'stop_input': stop_input, 'limiter': limiter, 'cooldown': cooldown, 'yara_scanning': yara_scanning, 'search_rules': search_rules, 'binary_rules': binary_rules, } try: print("\n") for x in vars_dict.keys(): if x != 'search_rules' and x != 'binary_rules': if name == 'nt': print(f"{x}]: {str(vars_dict[x])}") print("---------------------") else: print(f"\x1b[94m[{x}]\x1b[0m: " + f"\x1b[1;32;40m{str(vars_dict[x])}\x1b[0m") print("\x1b[94m---------------------\x1b[0m") finally: print("\n") return vars_dict
def config(configpath): """ :param configpath: path to config file, if it is blank or non-existent, it runs manual setup :return: vars_dict, a dictionary containing all the variables needed to run the main functions """ # Manual Setup: if path.isfile(configpath) is False: # Saving options (workpath and saveall): while True: workpath = lib.print_input( "Enter the path you wish to save text documents to (enter curdir for current directory)" ) workpath = syspath[0] if workpath.lower() == 'curdir' else workpath if path.isdir(workpath): lib.print_success("Valid Path...") workpath = workpath if any( [workpath.endswith('\\'), workpath.endswith('/')]) else f'{workpath}/' else: lib.print_error("Invalid path, check input...") continue savechoice = input( "Save all documents (Enter N to only save matched documents)? [y/n]: " ) saveall = True if savechoice.lower() in ['y', 'yes'] else False break # Services to Enable (services): while True: for x in collectors.service_names.keys(): lib.print_status(f"[{x}]: {collectors.service_names[x]}") service_choice = lib.print_input( "Enter the number(s) of the services you wish to scrape, " "separated by a comma").replace(" ", '').split(',') services = [ collectors.service_names[int(x)] for x in service_choice if int(x) in collectors.service_names.keys() ] services = list(collectors.service_names.values() ) if services == [] else services break # Looping, Limiter, and Cooldown Input (stop_input, limiter, cooldown): while True: loop_input = lib.print_input("Run in a constant loop? [y]/[n]") if loop_input.lower() == 'y': stop_input = True else: stop_input = int( lib.print_input( "Enter the amount of times you want to fetch the archives: " )) # If they enter 0 or below pastes to fetch, run in an infinite loop: stop_input = True if stop_input <= 0 else stop_input # Limiter and Cooldown limiter = int( lib.print_input( "Enter the request limit you wish to use (recommended: 5)") ) cooldown = int( lib.print_input( "Enter the cooldown between IP bans/Archive scrapes (recommended: 600)" )) # If no values are entered, select the recommended limiter = 5 if any( [limiter <= 0, isinstance(limiter, int) is False]) else limiter cooldown = 600 if any( [cooldown <= 0, isinstance(cooldown, int) is False]) else cooldown break # YARA (yara_scanning) while True: yara_choice = lib.print_input( "Enable scanning documents using YARA rules? [y/n]") if yara_choice.lower() not in ['y', 'n', 'yes', 'no']: lib.print_error("Invalid Input.") continue elif yara_choice.lower() in ['y', 'yes']: yara_scanning = True elif yara_choice.lower() in ['n', 'no']: yara_scanning = False break # Building Settings Dict: vars_dict = { 'workpath': workpath, 'stop_input': stop_input, 'limiter': limiter, 'cooldown': cooldown, 'yara_scanning': yara_scanning, 'services': services, 'saveall': saveall, } # Saving savechoice = lib.print_input( 'Save configuration to file for repeated use? [y]/[n]') if savechoice.lower() == 'y': configname = lib.print_input( "Enter the config name (no extension)") configname = configname.split( ".")[0] if '.json' in configname else configname json.dump(vars_dict, open(f"{configname}.json", 'w')) # Loading Config: else: vars_dict = json.load(open(configpath)) # YARA Compilation: # YARA rules aren't written to files because they cant be serialized if vars_dict['yara_scanning']: vars_dict['search_rules'] = yara.compile( filepaths={ f.split('.')[0]: path.join( f'{syspath[0]}/yara_rules/general_rules/', f) for f in listdir(f'{syspath[0]}/yara_rules/general_rules/') if path.isfile( path.join(f'{syspath[0]}/yara_rules/general_rules/', f)) and f.endswith(".yar") or f.endswith(".yara") }) vars_dict['binary_rules'] = yara.compile( filepaths={ f.split('.')[0]: path.join( f'{syspath[0]}/yara_rules/binary_rules/', f) for f in listdir(f'{syspath[0]}/yara_rules/binary_rules/') if path.isfile( path.join(f'{syspath[0]}/yara_rules/binary_rules/', f)) and f.endswith(".yar") or f.endswith(".yara") }) # Display and Return: try: print("\n") for x in vars_dict.keys(): if x != 'search_rules' and x != 'binary_rules': print(f"\x1b[94m[{x}]\x1b[0m: " + f"\x1b[1;32;40m{str(vars_dict[x])}\x1b[0m") print("\x1b[94m---------------------\x1b[0m") finally: print("\n") return vars_dict