from multiprocess import Manager, Process def fun(d, l): d[1] = '1' d[2] = 2 d[0.25] = None l.reverse() if __name__ == '__main__': manager = Manager() d = manager.dict() l = manager.list(range(10)) p = Process(target=fun, args=(d, l)) p.start() p.join() print d print l
def osint(self,organization,domain,files,ext,scope_file,aws,aws_fixes,html, screenshots,graph,nuke,whoxy_limit,typo,unsafe): """ The OSINT toolkit: This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information. ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history. View the wiki for the full details, reporting information, and lists of API keys. Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include: * Single Address: 8.8.8.8 * Basic CIDR: 8.8.8.0/24 * Nmap-friendly Range: 8.8.8.8-10 * Underscores? OK: 8.8.8.8_8.8.8.10 """ click.clear() click.secho(asciis.print_art(),fg="magenta") click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta") click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",fg="green") # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() rev_domain_list = manager.list() # Create reporter object and generate lists of everything, just IP addresses, and just domains browser = helpers.setup_headless_chrome(unsafe) report = reporter.Reporter(organization,report_path,output_report,browser) report.create_tables() scope,ip_list,domain_list = report.prepare_scope(ip_list,domain_list,scope_file,domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] # Phase 1 jobs company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain,)) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain_list,rev_domain_list,organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Hunter", target=report.create_domain_report_table, args=(organization,scope,ip_list,domain_list,rev_domain_list,whoxy_limit)) jobs.append(domain_report) # Phase 2 jobs shodan_report = Process(name="Shodan Hunter", target=report.create_shodan_table, args=(ip_list,domain_list)) more_jobs.append(shodan_report) if typo: lookalike_report = Process(name="Lookalike Domain Reviewer", target=report.create_lookalike_table, args=(organization,domain)) more_jobs.append(lookalike_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path,browser)) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_metadata_table, args=(domain,ext,report_path)) more_jobs.append(files_report) # Phase 3 jobs cloud_report = Process(name="Cloud Hunter", target=report.create_cloud_table, args=(organization,domain,aws,aws_fixes)) even_more_jobs.append(cloud_report) # Process the lists of jobs in phases, starting with phase 1 click.secho("[+] Beginning initial discovery phase! This could take some time...",fg="green") for job in jobs: click.secho("[+] Starting new process: {}".format(job.name),fg="green") job.start() for job in jobs: job.join() # Wait for phase 1 and then begin phase 2 jobs click.secho("[+] Initial discovery is complete! Proceeding with additional queries...",fg="green") for job in more_jobs: click.secho("[+] Starting new process: {}".format(job.name),fg="green") job.start() for job in more_jobs: job.join() # Wait for phase 2 and then begin phase 3 jobs click.secho("[+] Final phase: checking the cloud and web services...",fg="green") for job in even_more_jobs: click.secho("[+] Starting new process: {}".format(job.name),fg="green") job.start() for job in even_more_jobs: job.join() # All jobs are done, so close out the SQLIte3 database connection report.close_out_reporting() click.secho("[+] Job's done! Your results are in {} and can be viewed and queried with \ any SQLite browser.".format(output_report),fg="green") # Perform additional tasks depending on the user's command line options if graph: graph_reporter = grapher.Grapher(output_report) click.secho("[+] Loading ODIN database file {} for conversion to Neo4j".format(output_report),fg="green") if nuke: if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \ fresh start. Proceed?",fg="red"),default=True): try: graph_reporter.clear_neo4j_database() click.secho("[+] Database successfully wiped!\n",fg="green") except Exception as error: click.secho("[!] Failed to clear the database! Check the Neo4j console and \ your configuration and try running grapher.py again.",fg="red") click.secho("L.. Details: {}".format(error),fg="red") else: click.secho("[!] You can convert your database to a graph database later. \ Run lib/grapher.py with the appropriate options.",fg="red") try: graph_reporter.convert() except Exception as error: click.secho("[!] Failed to convert the database! Check the Neo4j console and \ your configuration and try running grapher.py again.",fg="red") click.secho("L.. Details: {}".format(error),fg="red") if html: click.secho("\n[+] Creating the HTML report using {}.".format(output_report),fg="green") try: html_reporter = htmlreporter.HTMLReporter(organization,report_path + "/html_report/",output_report) html_reporter.generate_full_report() except Exception as error: click.secho("[!] Failed to create the HTML report!",fg="red") click.secho("L.. Details: {}".format(error),fg="red")
def osint(self, organization, domain, files, ext, delete, scope_file, aws, aws_fixes, verbose, html, screenshots): """ The OSINT toolkit:\n This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information.\n ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history.\n View the README for the full detailsand lists of API keys! """ asciis.print_art() print(green("[+] OSINT Module Selected: ODIN will run all recon modules.")) if verbose: print( yellow( "[*] Verbose output Enabled -- Enumeration of RDAP contact information \ is enabled, so you may get a lot of it if scope includes a large cloud provider." )) # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() # Create reporter object and generate final list, the scope from scope file report = reporter.Reporter(output_report) report.create_tables() scope, ip_list, domain_list = report.prepare_scope( ip_list, domain_list, scope_file, domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain, )) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain, organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Address Recon", target=report.create_domain_report_table, args=(scope, ip_list, domain_list, verbose)) jobs.append(domain_report) shodan_report = Process(name="Shodan Queries", target=report.create_shodan_table, args=(ip_list, domain_list)) more_jobs.append(shodan_report) urlcrazy_report = Process(name="Domain Squatting Recon", target=report.create_urlcrazy_table, args=(organization, domain)) more_jobs.append(urlcrazy_report) cloud_report = Process(name="Cloud Recon", target=report.create_cloud_table, args=(organization, domain, aws, aws_fixes)) even_more_jobs.append(cloud_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path, )) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_foca_table, args=(domain, ext, delete, report_path, verbose)) jobs.append(files_report) print( green( "[+] Beginning initial discovery phase! This could take some time..." )) for job in jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in jobs: job.join() print( green( "[+] Initial discovery is complete! Proceeding with additional queries..." )) for job in more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in more_jobs: job.join() print(green("[+] Final phase: checking the cloud and web services...")) for job in even_more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in even_more_jobs: job.join() report.close_out_reporting() print( green("[+] Job's done! Your results are in {}.".format( output_report))) if html: html_reporter = htmlreporter.HTMLReporter( organization, report_path + "/html_report/", output_report) html_reporter.generate_full_report()
def osint(self, organization, domain, files, ext, delete, scope_file, aws, aws_fixes, html, screenshots, graph, nuke, whoxy_limit): """ The OSINT toolkit:\n This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information. ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history. View the README for the full detailsand lists of API keys! Note: If providing a scope file, acceptable IP addresses/ranges include: * Single Address: 8.8.8.8 * Basic CIDR: 8.8.8.0/24 * Nmap-friendly Range: 8.8.8.8-10 * Underscores? OK: 8.8.8.8_8.8.8.10 """ click.clear() asciis.print_art() print(green("[+] OSINT Module Selected: ODIN will run all recon modules.")) verbose = None if verbose: print( yellow( "[*] Verbose output Enabled -- Enumeration of RDAP contact information \ is enabled, so you may get a lot of it if scope includes a large cloud provider." )) # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() # Create reporter object and generate final list, the scope from scope file report = reporter.Reporter(report_path, output_report) report.create_tables() scope, ip_list, domain_list = report.prepare_scope( ip_list, domain_list, scope_file, domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain, )) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain_list, organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Address Recon", target=report.create_domain_report_table, args=(organization, scope, ip_list, domain_list, whoxy_limit)) jobs.append(domain_report) shodan_report = Process(name="Shodan Queries", target=report.create_shodan_table, args=(ip_list, domain_list)) more_jobs.append(shodan_report) urlcrazy_report = Process(name="Domain Squatting Recon", target=report.create_urlcrazy_table, args=(organization, domain)) more_jobs.append(urlcrazy_report) cloud_report = Process(name="Cloud Recon", target=report.create_cloud_table, args=(organization, domain, aws, aws_fixes)) even_more_jobs.append(cloud_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path, )) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_foca_table, args=(domain, ext, delete, report_path, verbose)) more_jobs.append(files_report) print( green( "[+] Beginning initial discovery phase! This could take some time..." )) for job in jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in jobs: job.join() print( green( "[+] Initial discovery is complete! Proceeding with additional queries..." )) for job in more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in more_jobs: job.join() print(green("[+] Final phase: checking the cloud and web services...")) for job in even_more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in even_more_jobs: job.join() report.close_out_reporting() print( green( "[+] Job's done! Your results are in {} and can be viewed and queried with \ any SQLite browser.".format(output_report))) if graph: graph_reporter = grapher.Grapher(output_report) print( green( "[+] Loading ODIN database file {} for conversion to Neo4j" ).format(output_report)) if nuke: confirm = input( red("\n[!] You set the --nuke option. This wipes out all nodes \ for a fresh start. Proceed? (Y\\N) ")) if confirm.lower() == "y": graph_reporter.clear_neo4j_database() print(green("[+] Database successfully wiped!\n")) graph_reporter.convert() else: print( red("[!] Then you can convert your database to a graph database later. \ Run lib/grapher.py with the appropriate options.")) else: graph_reporter.convert() if html: print( green("\n[+] Creating the HTML report using {}.".format( output_report))) html_reporter = htmlreporter.HTMLReporter( organization, report_path + "/html_report/", output_report) html_reporter.generate_full_report()
def perm_test(self, nperm, npr=1): """ Performs permutation testing on residual matrix SVD. The rows of the residual matrix are first permuted. Then get_tks is called to calculate explained variance ratios and these tks are compared to the values from the actual residual matrix. A running total is kept for the number of times the explained variance from the permuted matrix exceeds that from the original matrix. And significance is estimated by dividing these totals by the number of permutations. This permutation testing is multiprocessed to decrease calculation times. Parameters ---------- nperm : int Number of permutations to be tested. npr : int Number of processors to be used. Attributes ---------- sigs : array Estimated significances for each batch effect. """ def single_it(rseed): """ Single iteration of permutation testing. Permutes residual matrix, calculates new tks for permuted matrix and compares to original tks. Parameters ---------- rseed : int Random seed. Returns ------- out : arr Counts of number of times permuted explained variance ratio exceeded explained variance ratio from actual residual matrix. """ rstate = np.random.RandomState(rseed * 100) rstar = np.copy(self.res) out = np.zeros(len(self.tks)) for i in range(rstar.shape[0]): rstate.shuffle(rstar[i, :]) resstar = self.get_res(rstar) tkstar = self.get_tks(resstar) for m in range(len(self.tks)): if tkstar[m] > self.tks[m]: out[m] += 1 return out if int(npr) > 1: mgr = Manager() output = mgr.list() l = mgr.Lock() with Pool(int(npr)) as pool: pbar = tqdm(total=int(nperm), desc='permuting', position=0, smoothing=0) imap_it = pool.imap_unordered(single_it, range(int(nperm))) for x in imap_it: pbar.update(1) with l: output.append(x) pbar.close() pool.close() pool.join() self.sigs = np.sum(np.asarray(output), axis=0) / float(nperm) time.sleep(40) else: output = [] with tqdm(total=int(nperm), desc='permuting', position=0, smoothing=0) as pbar: for x in range(int(nperm)): output.append(single_it(x)) pbar.update(1) self.sigs = np.sum(np.asarray(output), axis=0) / float(nperm)
class Storage(object): ''' Storage system ''' def __init__(self): # The given page size self._PAGE_SIZE = 4096 # The given size for data blocks self._BLOCK_SIZE = 1 * self._PAGE_SIZE # Meta data about datasets self._dataset_table = {} # Read/write head position self._position = 0 # Manager for concurrency self.manager = Manager() # Job-queue for reading data self.job_queue = self.manager.list() # Data queueueueuueue self.data_queues = self.manager.dict() # Path to storage file _path = 'data.data' # Size of storage (Default 200 mb) self._SIZE = 4096 * 256 * 200 # Amount of blocks self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE) # Check whether a storage file exists, else create one if not os.path.exists(_path): print('Writing storage file') f = open(_path, 'w+b') f.write(b'?' * self._SIZE) f.close # Open storage and create a MMAP try: storage = open(_path, 'a+b') except: print('Cannot open storage file!') # Create MMAP to file self.datamap = mmap.mmap(storage.fileno(), 0) # Free space vector self.free_space =[(0, self._BLOCKS)] def _write_data(self, address, data_block, flush=True): ''' Writes a data block to the page at the given address ''' print('ยค Writing data block at ' + str(address)) try: # Go to the current address self.datamap.seek(address) self._position = address # Write the block self.datamap.write(bytes(data_block, 'utf-8')) except: print('! Could not write data block to ' + str(address) + '. Not enough space.') # Flush the written data to the file if flush: try: self.datamap.flush() except: print("Cannot flush data with mmap!") pass def _read_block(self, address): ''' Writes data to a given address ''' print('+ Reading data from ' + str(address)) data = '' try: # Go to the current address self.datamap.seek(address) self._position = address # Read the data data = self.datamap.read(self._PAGE_SIZE) except: print('Could not read data block from ' + str(address)) return data def _worst_fit(self, n_blocks): ''' Data block allocation using worst-fit ''' # Get the largest free segment #! Faster to use max-heaps largest_segment = sorted(self.free_space, key=lambda x: x[1])[0] blocks_amount = largest_segment[1] assert blocks_amount >= n_blocks # Construct a list of free datablocks free_blocks = [] current_block = largest_segment[0] for _ in range(n_blocks): free_blocks.append(current_block) current_block += self._BLOCK_SIZE # Remove the free space and add the remaining # free space after allocation self.free_space.remove(largest_segment) self.free_space.append((current_block, blocks_amount - n_blocks)) return free_blocks def _request_blocks(self, n_blocks): return self._worst_fit(n_blocks) def get_size(self, dataset_id): ''' Get the amount of blocks in a dataset ''' return self._dataset_table[dataset_id].size def append_data(self, dataset_id, data_block, address, flush=True): ''' Append data to an existing dataset ''' # Check if there is any more allocated space # for the dataset if self._dataset_table[dataset_id].space_left(): # Write data block and increament size self._write_data(address, data_block, flush) self._dataset_table[dataset_id].size+=1 return address def add_dataset(self, dataset_id, dataset, size=None): ''' Add a new dataset to the storage ''' # Add metadata about the dataset if size: current_size = size else: current_size = len(dataset) self._dataset_table[dataset_id] = Dataset(current_size) requested_blocks = self._request_blocks(current_size) assert len(requested_blocks) >= len(dataset) # Write the data blocks to a file block_index = 0 for data_block in dataset: self.append_data(dataset_id, data_block, requested_blocks[block_index], flush=False) self._dataset_table[dataset_id].append_block_index(requested_blocks[block_index]) block_index += 1 try: self.datamap.flush() except: print("Cannot flush data with mmap!") pass def read_data(self, dataset_id, data_queue): ''' Run the execution-queue for a given dataset ''' # Generate a random id (6 characters) data_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6)) dataset = self._dataset_table[dataset_id] self.data_queues[data_id] = data_queue for address in dataset.datablocks: self.job_queue.append((address, data_id)) return dataset.datablocks def reader(self): ''' A reading process, which serves data blocks requests from read_data ''' while True: # Sort the list of jobs by their address jobs = sorted(self.job_queue, key=lambda x: x[0]) try: # Find the job with the closest highest address (address, data_id) = next(x for x in jobs if x[0] >= self._position) # Read the data from disc data = self._read_block(address) # Serve data to the requesting process self.data_queues[data_id].put(data) # Remove the job from the list self.job_queue.remove((address, data_id)) except: # No jobs found. Start from position 0. self._position = 0 time.sleep(0.01)
def main(settings, rescue_running=[]): """ Perform the primary loop of building, submitting, monitoring, and analyzing jobs. This function works via a loop of calls to thread.process and thread.interpret for each thread that hasn't terminated, until either the global termination criterion is met or all the individual threads have completed. Parameters ---------- settings : argparse.Namespace Settings namespace object rescue_running : list List of threads passed in from handle_loop_exception, containing running threads. If given, setup is skipped and the function proceeds directly to the main loop. Returns ------- exit_message : str A message indicating the status of ATESA at the end of main """ if not rescue_running: # Implement resample if settings.job_type in ['aimless_shooting', 'committor_analysis' ] and settings.resample: # Store settings object in the working directory for compatibility with analysis/utility scripts if not settings.dont_dump: temp_settings = copy.deepcopy( settings ) # initialize temporary copy of settings to modify temp_settings.__dict__.pop( 'env') # env attribute is not picklable pickle.dump(temp_settings, open(settings.working_directory + '/settings.pkl', 'wb'), protocol=2) # Run resampling if settings.job_type == 'aimless_shooting': utilities.resample(settings, partial=False, full_cvs=settings.full_cvs) if settings.information_error_checking: # update info_err.out if called for by settings information_error.main() elif settings.job_type == 'committor_analysis': resample_committor_analysis.resample_committor_analysis( settings) return 'Resampling complete' # Make working directory if it does not exist, handling overwrite and restart as needed if os.path.exists(settings.working_directory): if settings.overwrite and not settings.restart: if os.path.exists( settings.working_directory + '/cvs.txt'): # a kludge to avoid removing cvs.txt if os.path.exists('ATESA_TEMP_CVS.txt'): raise RuntimeError( 'tried to create temporary file ATESA_TEMP_CVS.txt in directory: ' + os.getcwd() + ', but it already exists. Please move, delete, or rename it.' ) shutil.move(settings.working_directory + '/cvs.txt', 'ATESA_TEMP_CVS.txt') shutil.rmtree(settings.working_directory) os.mkdir(settings.working_directory) if os.path.exists('ATESA_TEMP_CVS.txt' ): # continuation of aforementioned kludge shutil.move('ATESA_TEMP_CVS.txt', settings.working_directory + '/cvs.txt') elif not settings.restart and glob.glob( settings.working_directory + '/*') == [settings.working_directory + '/cvs.txt']: # Occurs when restart = False, overwrite = False, and auto_cvs is used pass elif not settings.restart: raise RuntimeError( 'Working directory ' + settings.working_directory + ' already exists, but overwrite ' '= False and restart = False. Either change one of these two settings or choose a ' 'different working directory.') else: if not settings.restart: os.mkdir(settings.working_directory) else: raise RuntimeError('Working directory ' + settings.working_directory + ' does not yet exist, but ' 'restart = True.') # Store settings object in the working directory for compatibility with analysis/utility scripts if os.path.exists( settings.working_directory + '/settings.pkl'): # for checking for need for resample later previous_settings = pickle.load( open(settings.working_directory + '/settings.pkl', 'rb')) settings.previous_cvs = previous_settings.cvs try: settings.previous_information_error_max_dims = previous_settings.information_error_max_dims except AttributeError: pass try: settings.previous_information_error_lmax_string = previous_settings.information_error_lmax_string except AttributeError: pass if not settings.dont_dump: temp_settings = copy.deepcopy( settings) # initialize temporary copy of settings to modify temp_settings.__dict__.pop( 'env' ) # env attribute is not picklable (update: maybe no longer true, but doesn't matter) pickle.dump(temp_settings, open(settings.working_directory + '/settings.pkl', 'wb'), protocol=2) # Build or load threads allthreads = init_threads(settings) # Move runtime to working directory os.chdir(settings.working_directory) running = allthreads.copy() # to be pruned later by thread.process() attempted_rescue = False # to keep track of general error handling below else: allthreads = pickle.load( open(settings.working_directory + '/restart.pkl', 'rb')) running = rescue_running attempted_rescue = True # Initialize threads with first process step try: if not rescue_running: # if rescue_running, this step has already finished and we just want the while loop for thread in allthreads: running = thread.process(running, settings) except Exception as e: if settings.restart: print( 'The following error occurred while attempting to initialize threads from restart.pkl. It may be ' 'corrupted.') #'If you haven\'t already done so, consider running verify_threads.py to remove corrupted threads from this file.' raise e try: if settings.job_type == 'aimless_shooting' and len( os.sched_getaffinity(0)) > 1: # Initialize Manager for shared data across processes; this is necessary because multiprocessing is being # retrofitted to code designed for serial processing, but it works! manager = Manager() # Setup Managed allthreads list managed_allthreads = [] for thread in allthreads: thread_dict = thread.__dict__ thread_history_dict = thread.history.__dict__ managed_thread = Thread() managed_thread.history = manager.Namespace() managed_thread.__dict__.update(thread_dict) managed_thread.history.__dict__.update(thread_history_dict) managed_allthreads.append(managed_thread) allthreads = manager.list(managed_allthreads) # Setup Managed settings Namespace settings_dict = settings.__dict__ managed_settings = manager.Namespace() # Need to explicitly update every key because of how the Managed Namespace works. # Calling exec is the best way to do this I could find. Updating managed_settings.__dict__ doesn't work. for key in settings_dict.keys(): exec('managed_settings.' + key + ' = settings_dict[key]') # Distribute processes among available core Pool with get_context("spawn").Pool(len(os.sched_getaffinity(0))) as p: p.starmap( main_loop, zip(itertools.repeat(managed_settings), itertools.repeat(allthreads), [[thread] for thread in allthreads])) else: main_loop(settings, allthreads, running) except AttributeError: # os.sched_getaffinity raises AttributeError on non-UNIX systems. main_loop(settings, allthreads, running) ## Deprecated thread pool # pool = ThreadPool(len(allthreads)) # func = partial(main_loop, settings) # results = pool.map(func, [[thread] for thread in allthreads]) jobtype = factory.jobtype_factory(settings.job_type) jobtype.cleanup(settings) return 'ATESA run exiting normally'
def writeEventsToCsv(self, urls, processedUrlsFName, batchSize=20): numUrls = len(urls) origNumUrls = numUrls urlsWithEvents = 0 totalEvents = 0 processedListings = 0 numTimeouts = 0 try: with open(processedUrlsFName, 'r') as pus: pUrls = list(set(pus.read().split('\r\n'))) logging.info( 'Already processed {0} of {1} urls. Picking up where we' ' left off.'.format(len(pUrls), numUrls)) urls = [url for url in urls if url not in pUrls] numUrls = len(urls) except IOError: pass with open(processedUrlsFName, 'a+') as pus: pUrls_writer = csv.writer(pus) with open(self.eventFile, 'a+') as f: writer = csv.writer(f) sttm = time.time() if self.eventMode == 'parallel': batches = [ urls[x:x + batchSize] for x in xrange(0, len(urls), batchSize)] for b, batch in enumerate(batches): logging.info('Starting batch {0} of {1}'.format( b + 1, len(batches))) manager = Manager() batchQueue = Queue() batchTimeoutList = manager.list() batchProcessedUrls = manager.list() batchEventQueue = manager.Queue() batchEventsSaved = manager.Value('i', 0) jobs = [] for i, url in enumerate(batch): batchQueue.put( [self.eventMode, url, batchEventQueue, batchProcessedUrls, batchTimeoutList]) for i in range(len(batch)): proc = Process( target=self.eventWorker, args=(batchQueue,)) proc.start() jobs.append(proc) writeProc = Process( target=self.writeToCsvWorker, args=( batchEventQueue, batchEventsSaved)) time.sleep(2) writeProc.start() for j, job in enumerate(jobs): # 5 seconds per url for each process before timeout job.join(max(60, 5 * len(batch))) if job.is_alive(): job.terminate() logging.info( 'Subprocess {0} of {1} timed out'.format( j + 1, min(24, len(batch)))) writeProc.join(max(60, 8 * len(batch))) totalEvents += batchEventsSaved.value processedListings += len(batch) for url in set(list(batchProcessedUrls)): pUrls_writer.writerow([url]) urlsWithEvents += len(set(list(batchProcessedUrls))) numTimeouts += len(set(list(batchTimeoutList))) durMins, minsLeft = self.timeElapsedLeft( sttm, b + 1, len(batches)) logging.info( 'Saved {0} new events from {1} of {2} listings. ' '\nEstimated time to ' 'completion: ~{3} min.'.format( batchEventsSaved.value, len(batchProcessedUrls), len(batch), minsLeft)) os.system( "ps aux | grep chrome | awk ' { print $2 } ' |" " xargs kill -9") elif self.eventMode == 'series': for i, url in enumerate(urls): numEvents = 0 events = self.getEventsFromListingUrl( self.eventMode, url, None, urls, []) if events is None: durMins, minsLeft = self.timeElapsedLeft( sttm, i + 1, numUrls) logging.info( 'No sales events scraped from listing' ' {0} of {1}. Check url: {2}. {3} min.' 'elapsed. {4} min. remaining.'.format( i + 1, numUrls, url, durMins, minsLeft)) continue for event in events: totalEvents += 1 numEvents += 1 writer.writerow(event) urlsWithEvents += 1 pUrls_writer.writerow([url]) durMins, minsLeft = self.timeElapsedLeft( sttm, i, numUrls) if (i + 1) % 1 == 0: logging.info( 'Scraped {0} sales events from listing {1}' ' of {2}. Scraped {3} total sales events in' ' {4} min. Estimated time to completion:' ' ~{5} min.'.format( numEvents, i + 1, numUrls, totalEvents, durMins, minsLeft)) else: raise ValueError( 'Must specify valid event scraping ' 'mode: ["parallel", "series"]') if numUrls > 0: self.pctUrlsWithEvents = round( urlsWithEvents / origNumUrls * 100.0, 1) else: self.pctUrlsWithEvents = -999 logging.info('#' * 100) logging.info('#' * 100) logging.info( 'Scraped events from {0} of {1} ({2}%) urls.'.format( urlsWithEvents, numUrls, self.pctUrlsWithEvents).center( 90, ' ').center(100, '#').upper()) logging.info( ('{0} of {1} urls timed out while scraping events.'.format( numTimeouts, numUrls).upper().center(90, ' ').center( 100, '#'))) logging.info( ('Saved {0} events to {1}'.format( totalEvents, self.eventFile).upper().center( 90, ' ').center(100, '#'))) logging.info('#' * 100) logging.info('#' * 100)
def osint(self, organization, domain, files, ext, scope_file, aws, aws_fixes, html, screenshots, graph, nuke, whoxy_limit, typo, unsafe): """ The OSINT toolkit: This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information. ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history. View the wiki for the full details, reporting information, and lists of API keys. Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include: * Single Address: 8.8.8.8 * Basic CIDR: 8.8.8.0/24 * Nmap-friendly Range: 8.8.8.8-10 * Underscores? OK: 8.8.8.8_8.8.8.10 """ click.clear() click.secho(asciis.print_art(), fg="magenta") click.secho("\tRelease v{}, {}".format(VERSION, CODENAME), fg="magenta") click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.", fg="green") # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() rev_domain_list = manager.list() # Create reporter object and generate lists of everything, just IP addresses, and just domains browser = helpers.setup_headless_chrome(unsafe) report = reporter.Reporter(organization, report_path, output_report, browser) report.create_tables() scope, ip_list, domain_list = report.prepare_scope( ip_list, domain_list, scope_file, domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] # Phase 1 jobs company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain, )) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain_list, rev_domain_list, organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Hunter", target=report.create_domain_report_table, args=(organization, scope, ip_list, domain_list, rev_domain_list, whoxy_limit)) jobs.append(domain_report) # Phase 2 jobs shodan_report = Process(name="Shodan Hunter", target=report.create_shodan_table, args=(ip_list, domain_list)) more_jobs.append(shodan_report) if typo: lookalike_report = Process(name="Lookalike Domain Reviewer", target=report.create_lookalike_table, args=(organization, domain)) more_jobs.append(lookalike_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path, browser)) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_metadata_table, args=(domain, ext, report_path)) more_jobs.append(files_report) # Phase 3 jobs cloud_report = Process(name="Cloud Hunter", target=report.create_cloud_table, args=(organization, domain, aws, aws_fixes)) even_more_jobs.append(cloud_report) # Process the lists of jobs in phases, starting with phase 1 click.secho( "[+] Beginning initial discovery phase! This could take some time...", fg="green") for job in jobs: click.secho("[+] Starting new process: {}".format(job.name), fg="green") job.start() for job in jobs: job.join() # Wait for phase 1 and then begin phase 2 jobs click.secho( "[+] Initial discovery is complete! Proceeding with additional queries...", fg="green") for job in more_jobs: click.secho("[+] Starting new process: {}".format(job.name), fg="green") job.start() for job in more_jobs: job.join() # Wait for phase 2 and then begin phase 3 jobs click.secho("[+] Final phase: checking the cloud and web services...", fg="green") for job in even_more_jobs: click.secho("[+] Starting new process: {}".format(job.name), fg="green") job.start() for job in even_more_jobs: job.join() # All jobs are done, so close out the SQLIte3 database connection report.close_out_reporting() click.secho( "[+] Job's done! Your results are in {} and can be viewed and queried with \ any SQLite browser.".format(output_report), fg="green") # Perform additional tasks depending on the user's command line options if graph: graph_reporter = grapher.Grapher(output_report) click.secho( "[+] Loading ODIN database file {} for conversion to Neo4j". format(output_report), fg="green") if nuke: if click.confirm(click.style( "[!] You set the --nuke option. This wipes out all nodes for a \ fresh start. Proceed?", fg="red"), default=True): try: graph_reporter.clear_neo4j_database() click.secho("[+] Database successfully wiped!\n", fg="green") except Exception as error: click.secho( "[!] Failed to clear the database! Check the Neo4j console and \ your configuration and try running grapher.py again.", fg="red") click.secho("L.. Details: {}".format(error), fg="red") else: click.secho( "[!] You can convert your database to a graph database later. \ Run lib/grapher.py with the appropriate options.", fg="red") try: graph_reporter.convert() except Exception as error: click.secho( "[!] Failed to convert the database! Check the Neo4j console and \ your configuration and try running grapher.py again.", fg="red") click.secho("L.. Details: {}".format(error), fg="red") if html: click.secho("\n[+] Creating the HTML report using {}.".format( output_report), fg="green") try: html_reporter = htmlreporter.HTMLReporter( organization, report_path + "/html_report/", output_report) html_reporter.generate_full_report() except Exception as error: click.secho("[!] Failed to create the HTML report!", fg="red") click.secho("L.. Details: {}".format(error), fg="red")