from multiprocess import Manager, Process


def fun(d, l):
    d[1] = '1'
    d[2] = 2
    d[0.25] = None
    l.reverse()


if __name__ == '__main__':
    manager = Manager()

    d = manager.dict()
    l = manager.list(range(10))

    p = Process(target=fun, args=(d, l))
    p.start()
    p.join()

    print d
    print l
Example #2
0
def osint(self,organization,domain,files,ext,scope_file,aws,aws_fixes,html,
          screenshots,graph,nuke,whoxy_limit,typo,unsafe):
    """
The OSINT toolkit:

This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the wiki for the full details, reporting information, and lists of API keys.

Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    click.secho(asciis.print_art(),fg="magenta")
    click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta")
    click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",fg="green")
    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"
    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        rev_domain_list = manager.list()
        # Create reporter object and generate lists of everything, just IP addresses, and just domains
        browser = helpers.setup_headless_chrome(unsafe)
        report = reporter.Reporter(organization,report_path,output_report,browser)
        report.create_tables()
        scope,ip_list,domain_list = report.prepare_scope(ip_list,domain_list,scope_file,domain)
        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        # Phase 1 jobs
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain,))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list,rev_domain_list,organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Hunter",
                                target=report.create_domain_report_table,
                                args=(organization,scope,ip_list,domain_list,rev_domain_list,whoxy_limit))
        jobs.append(domain_report)
        # Phase 2 jobs
        shodan_report = Process(name="Shodan Hunter",
                                target=report.create_shodan_table,
                                args=(ip_list,domain_list))
        more_jobs.append(shodan_report)
        if typo:
            lookalike_report = Process(name="Lookalike Domain Reviewer",
                                      target=report.create_lookalike_table,
                                      args=(organization,domain))
            more_jobs.append(lookalike_report)
        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path,browser))
            more_jobs.append(take_screenshots)
        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_metadata_table,
                                   args=(domain,ext,report_path))
            more_jobs.append(files_report)
        # Phase 3 jobs
        cloud_report = Process(name="Cloud Hunter",
                               target=report.create_cloud_table,
                               args=(organization,domain,aws,aws_fixes))
        even_more_jobs.append(cloud_report)
        # Process the lists of jobs in phases, starting with phase 1
        click.secho("[+] Beginning initial discovery phase! This could take some time...",fg="green")
        for job in jobs:
            click.secho("[+] Starting new process: {}".format(job.name),fg="green")
            job.start()
        for job in jobs:
            job.join()
        # Wait for phase 1 and then begin phase 2 jobs
        click.secho("[+] Initial discovery is complete! Proceeding with additional queries...",fg="green")
        for job in more_jobs:
            click.secho("[+] Starting new process: {}".format(job.name),fg="green")
            job.start()
        for job in more_jobs:
            job.join()
        # Wait for phase 2 and then begin phase 3 jobs
        click.secho("[+] Final phase: checking the cloud and web services...",fg="green")
        for job in even_more_jobs:
            click.secho("[+] Starting new process: {}".format(job.name),fg="green")
            job.start()
        for job in even_more_jobs:
            job.join()
        # All jobs are done, so close out the SQLIte3 database connection
        report.close_out_reporting()
        click.secho("[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report),fg="green")
        # Perform additional tasks depending on the user's command line options
        if graph:
            graph_reporter = grapher.Grapher(output_report)
            click.secho("[+] Loading ODIN database file {} for conversion to Neo4j".format(output_report),fg="green")
            if nuke:
                if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \
fresh start. Proceed?",fg="red"),default=True):
                    try:
                        graph_reporter.clear_neo4j_database()
                        click.secho("[+] Database successfully wiped!\n",fg="green")
                    except Exception as error:
                        click.secho("[!] Failed to clear the database! Check the Neo4j console and \
your configuration and try running grapher.py again.",fg="red")
                        click.secho("L.. Details: {}".format(error),fg="red")
                else:
                    click.secho("[!] You can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options.",fg="red")
                try:
                    graph_reporter.convert()
                except Exception as error:
                    click.secho("[!] Failed to convert the database! Check the Neo4j console and \
your configuration and try running grapher.py again.",fg="red")
                    click.secho("L.. Details: {}".format(error),fg="red")
        if html:
            click.secho("\n[+] Creating the HTML report using {}.".format(output_report),fg="green")
            try:
                html_reporter = htmlreporter.HTMLReporter(organization,report_path + "/html_report/",output_report)
                html_reporter.generate_full_report()
            except Exception as error:
                click.secho("[!] Failed to create the HTML report!",fg="red")
                click.secho("L.. Details: {}".format(error),fg="red")
Example #3
0
def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, verbose, html, screenshots):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.\n
ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.\n
View the README for the full detailsand lists of API keys!
    """
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(scope, ip_list, domain_list, verbose))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green("[+] Job's done! Your results are in {}.".format(
                output_report)))

        if html:
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()
Example #4
0
def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, html, screenshots, graph, nuke, whoxy_limit):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the README for the full detailsand lists of API keys!

Note: If providing a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    verbose = None

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(report_path, output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(organization, scope, ip_list,
                                      domain_list, whoxy_limit))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            more_jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green(
                "[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report)))

        if graph:
            graph_reporter = grapher.Grapher(output_report)
            print(
                green(
                    "[+] Loading ODIN database file {} for conversion to Neo4j"
                ).format(output_report))

            if nuke:
                confirm = input(
                    red("\n[!] You set the --nuke option. This wipes out all nodes \
for a fresh start. Proceed? (Y\\N) "))
                if confirm.lower() == "y":
                    graph_reporter.clear_neo4j_database()
                    print(green("[+] Database successfully wiped!\n"))
                    graph_reporter.convert()
                else:
                    print(
                        red("[!] Then you can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options."))
            else:
                graph_reporter.convert()

        if html:
            print(
                green("\n[+] Creating the HTML report using {}.".format(
                    output_report)))
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()
Example #5
0
    def perm_test(self, nperm, npr=1):
        """
        Performs permutation testing on residual matrix SVD.

        The rows of the residual matrix are first permuted.  Then  get_tks is called to calculate explained variance ratios and these tks are compared to the values from the actual residual matrix.  A running total is kept for the number of times the explained variance from the permuted matrix exceeds that from the original matrix. And significance is estimated by dividing these totals by the number of permutations.  This permutation testing is multiprocessed to decrease calculation times.
        
        Parameters
        ----------
        nperm : int
            Number of permutations to be tested.
        npr : int
            Number of processors to be used.

        Attributes
        ----------
        sigs : array
            Estimated significances for each batch effect.

        """
        def single_it(rseed):
            """
            Single iteration of permutation testing.
            Permutes residual matrix, calculates new tks for permuted matrix and compares to original tks.
            Parameters
            ----------
            rseed : int
                Random seed.
            Returns
            -------
            out : arr
                Counts of number of times permuted explained variance ratio exceeded explained variance ratio from actual residual matrix.
            """

            rstate = np.random.RandomState(rseed * 100)
            rstar = np.copy(self.res)
            out = np.zeros(len(self.tks))
            for i in range(rstar.shape[0]):
                rstate.shuffle(rstar[i, :])
            resstar = self.get_res(rstar)
            tkstar = self.get_tks(resstar)
            for m in range(len(self.tks)):
                if tkstar[m] > self.tks[m]:
                    out[m] += 1
            return out

        if int(npr) > 1:
            mgr = Manager()
            output = mgr.list()
            l = mgr.Lock()
            with Pool(int(npr)) as pool:
                pbar = tqdm(total=int(nperm),
                            desc='permuting',
                            position=0,
                            smoothing=0)
                imap_it = pool.imap_unordered(single_it, range(int(nperm)))
                for x in imap_it:
                    pbar.update(1)
                    with l:
                        output.append(x)
            pbar.close()
            pool.close()
            pool.join()
            self.sigs = np.sum(np.asarray(output), axis=0) / float(nperm)
            time.sleep(40)
        else:
            output = []
            with tqdm(total=int(nperm),
                      desc='permuting',
                      position=0,
                      smoothing=0) as pbar:
                for x in range(int(nperm)):
                    output.append(single_it(x))
                    pbar.update(1)
            self.sigs = np.sum(np.asarray(output), axis=0) / float(nperm)
Example #6
0
class Storage(object):
    '''
    Storage system
    '''
    def __init__(self):
        # The given page size
        self._PAGE_SIZE = 4096

        # The given size for data blocks
        self._BLOCK_SIZE = 1 * self._PAGE_SIZE

        # Meta data about datasets
        self._dataset_table = {}

        # Read/write head position
        self._position = 0

        # Manager for concurrency
        self.manager = Manager()

        # Job-queue for reading data
        self.job_queue = self.manager.list()

        # Data queueueueuueue
        self.data_queues = self.manager.dict()

        # Path to storage file
        _path = 'data.data'

        # Size of storage (Default 200 mb)
        self._SIZE = 4096 * 256 * 200

        # Amount of blocks
        self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE)

        # Check whether a storage file exists, else create one
        if not os.path.exists(_path):
            print('Writing storage file')
            f = open(_path, 'w+b')
            f.write(b'?' * self._SIZE)
            f.close

        # Open storage and create a MMAP
        try:
            storage = open(_path, 'a+b')
        except:
            print('Cannot open storage file!')

        # Create MMAP to file
        self.datamap = mmap.mmap(storage.fileno(), 0)

        # Free space vector
        self.free_space =[(0, self._BLOCKS)]


    def _write_data(self, address, data_block, flush=True):
        '''
        Writes a data block to the page at the given address
        '''
        print('ยค Writing data block at ' + str(address))
        try:
            # Go to the current address
            self.datamap.seek(address)
            self._position = address

            # Write the block
            self.datamap.write(bytes(data_block, 'utf-8'))
        except:
            print('! Could not write data block to ' + str(address) + '. Not enough space.')

        # Flush the written data to the file
        if flush:
            try:
                self.datamap.flush()
            except:
                print("Cannot flush data with mmap!")
                pass


    def _read_block(self, address):
        '''
        Writes data to a given address
        '''
        print('+ Reading data from ' + str(address))
        data = ''
        try:
            # Go to the current address
            self.datamap.seek(address)
            self._position = address

            # Read the data
            data = self.datamap.read(self._PAGE_SIZE)
        except:
            print('Could not read data block from ' + str(address))

        return data


    def _worst_fit(self, n_blocks):
        '''
        Data block allocation using worst-fit
        '''
        # Get the largest free segment
        #! Faster to use max-heaps
        largest_segment = sorted(self.free_space, key=lambda x: x[1])[0]
        blocks_amount = largest_segment[1]

        assert blocks_amount >= n_blocks

        # Construct a list of free datablocks
        free_blocks = []
        current_block = largest_segment[0]
        for _ in range(n_blocks):
            free_blocks.append(current_block)
            current_block += self._BLOCK_SIZE

        # Remove the free space and add the remaining
        # free space after allocation
        self.free_space.remove(largest_segment)
        self.free_space.append((current_block, blocks_amount - n_blocks))

        return free_blocks


    def _request_blocks(self, n_blocks):
        return self._worst_fit(n_blocks)


    def get_size(self, dataset_id):
        '''
        Get the amount of blocks in a dataset
        '''
        return self._dataset_table[dataset_id].size


    def append_data(self, dataset_id, data_block, address, flush=True):
        '''
        Append data to an existing dataset
        '''
        # Check if there is any more allocated space
        # for the dataset
        if self._dataset_table[dataset_id].space_left():
            # Write data block and increament size
            self._write_data(address, data_block, flush)
            self._dataset_table[dataset_id].size+=1
            return address


    def add_dataset(self, dataset_id, dataset, size=None):
        '''
        Add a new dataset to the storage
        '''
        # Add metadata about the dataset
        if size:
            current_size = size
        else:
            current_size = len(dataset)

        self._dataset_table[dataset_id] = Dataset(current_size)

        requested_blocks = self._request_blocks(current_size)

        assert len(requested_blocks) >= len(dataset)

        # Write the data blocks to a file
        block_index = 0
        for data_block in dataset:
            self.append_data(dataset_id, data_block, requested_blocks[block_index], flush=False)
            self._dataset_table[dataset_id].append_block_index(requested_blocks[block_index])
            block_index += 1

        try:
            self.datamap.flush()
        except:
            print("Cannot flush data with mmap!")
            pass

    def read_data(self, dataset_id, data_queue):
        '''
        Run the execution-queue for a given dataset
        '''
        # Generate a random id (6 characters)
        data_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6))

        dataset = self._dataset_table[dataset_id]

        self.data_queues[data_id] = data_queue

        for address in dataset.datablocks:
            self.job_queue.append((address, data_id))

        return dataset.datablocks


    def reader(self):
        '''
        A reading process, which serves data blocks requests from read_data
        '''
        while True:
            # Sort the list of jobs by their address
            jobs = sorted(self.job_queue, key=lambda x: x[0])

            try:
                # Find the job with the closest highest address
                (address, data_id) = next(x for x in jobs if x[0] >= self._position)

                # Read the data from disc
                data = self._read_block(address)

                # Serve data to the requesting process
                self.data_queues[data_id].put(data)

                # Remove the job from the list
                self.job_queue.remove((address, data_id))
            except:
                # No jobs found. Start from position 0.
                self._position = 0
                time.sleep(0.01)
Example #7
0
def main(settings, rescue_running=[]):
    """
    Perform the primary loop of building, submitting, monitoring, and analyzing jobs.

    This function works via a loop of calls to thread.process and thread.interpret for each thread that hasn't
    terminated, until either the global termination criterion is met or all the individual threads have completed.

    Parameters
    ----------
    settings : argparse.Namespace
        Settings namespace object
    rescue_running : list
        List of threads passed in from handle_loop_exception, containing running threads. If given, setup is skipped and
        the function proceeds directly to the main loop.

    Returns
    -------
    exit_message : str
        A message indicating the status of ATESA at the end of main

    """

    if not rescue_running:
        # Implement resample
        if settings.job_type in ['aimless_shooting', 'committor_analysis'
                                 ] and settings.resample:
            # Store settings object in the working directory for compatibility with analysis/utility scripts
            if not settings.dont_dump:
                temp_settings = copy.deepcopy(
                    settings
                )  # initialize temporary copy of settings to modify
                temp_settings.__dict__.pop(
                    'env')  # env attribute is not picklable
                pickle.dump(temp_settings,
                            open(settings.working_directory + '/settings.pkl',
                                 'wb'),
                            protocol=2)
            # Run resampling
            if settings.job_type == 'aimless_shooting':
                utilities.resample(settings,
                                   partial=False,
                                   full_cvs=settings.full_cvs)
                if settings.information_error_checking:  # update info_err.out if called for by settings
                    information_error.main()
            elif settings.job_type == 'committor_analysis':
                resample_committor_analysis.resample_committor_analysis(
                    settings)
            return 'Resampling complete'

        # Make working directory if it does not exist, handling overwrite and restart as needed
        if os.path.exists(settings.working_directory):
            if settings.overwrite and not settings.restart:
                if os.path.exists(
                        settings.working_directory +
                        '/cvs.txt'):  # a kludge to avoid removing cvs.txt
                    if os.path.exists('ATESA_TEMP_CVS.txt'):
                        raise RuntimeError(
                            'tried to create temporary file ATESA_TEMP_CVS.txt in directory: '
                            + os.getcwd() +
                            ', but it already exists. Please move, delete, or rename it.'
                        )
                    shutil.move(settings.working_directory + '/cvs.txt',
                                'ATESA_TEMP_CVS.txt')
                shutil.rmtree(settings.working_directory)
                os.mkdir(settings.working_directory)
                if os.path.exists('ATESA_TEMP_CVS.txt'
                                  ):  # continuation of aforementioned kludge
                    shutil.move('ATESA_TEMP_CVS.txt',
                                settings.working_directory + '/cvs.txt')
            elif not settings.restart and glob.glob(
                    settings.working_directory +
                    '/*') == [settings.working_directory + '/cvs.txt']:
                # Occurs when restart = False, overwrite = False, and auto_cvs is used
                pass
            elif not settings.restart:
                raise RuntimeError(
                    'Working directory ' + settings.working_directory +
                    ' already exists, but overwrite '
                    '= False and restart = False. Either change one of these two settings or choose a '
                    'different working directory.')
        else:
            if not settings.restart:
                os.mkdir(settings.working_directory)
            else:
                raise RuntimeError('Working directory ' +
                                   settings.working_directory +
                                   ' does not yet exist, but '
                                   'restart = True.')

        # Store settings object in the working directory for compatibility with analysis/utility scripts
        if os.path.exists(
                settings.working_directory +
                '/settings.pkl'):  # for checking for need for resample later
            previous_settings = pickle.load(
                open(settings.working_directory + '/settings.pkl', 'rb'))
            settings.previous_cvs = previous_settings.cvs
            try:
                settings.previous_information_error_max_dims = previous_settings.information_error_max_dims
            except AttributeError:
                pass
            try:
                settings.previous_information_error_lmax_string = previous_settings.information_error_lmax_string
            except AttributeError:
                pass
        if not settings.dont_dump:
            temp_settings = copy.deepcopy(
                settings)  # initialize temporary copy of settings to modify
            temp_settings.__dict__.pop(
                'env'
            )  # env attribute is not picklable (update: maybe no longer true, but doesn't matter)
            pickle.dump(temp_settings,
                        open(settings.working_directory + '/settings.pkl',
                             'wb'),
                        protocol=2)

        # Build or load threads
        allthreads = init_threads(settings)

        # Move runtime to working directory
        os.chdir(settings.working_directory)

        running = allthreads.copy()  # to be pruned later by thread.process()
        attempted_rescue = False  # to keep track of general error handling below
    else:
        allthreads = pickle.load(
            open(settings.working_directory + '/restart.pkl', 'rb'))
        running = rescue_running
        attempted_rescue = True

    # Initialize threads with first process step
    try:
        if not rescue_running:  # if rescue_running, this step has already finished and we just want the while loop
            for thread in allthreads:
                running = thread.process(running, settings)
    except Exception as e:
        if settings.restart:
            print(
                'The following error occurred while attempting to initialize threads from restart.pkl. It may be '
                'corrupted.')
            #'If you haven\'t already done so, consider running verify_threads.py to remove corrupted threads from this file.'
        raise e

    try:
        if settings.job_type == 'aimless_shooting' and len(
                os.sched_getaffinity(0)) > 1:
            # Initialize Manager for shared data across processes; this is necessary because multiprocessing is being
            # retrofitted to code designed for serial processing, but it works!
            manager = Manager()

            # Setup Managed allthreads list
            managed_allthreads = []
            for thread in allthreads:
                thread_dict = thread.__dict__
                thread_history_dict = thread.history.__dict__
                managed_thread = Thread()
                managed_thread.history = manager.Namespace()
                managed_thread.__dict__.update(thread_dict)
                managed_thread.history.__dict__.update(thread_history_dict)
                managed_allthreads.append(managed_thread)
            allthreads = manager.list(managed_allthreads)

            # Setup Managed settings Namespace
            settings_dict = settings.__dict__
            managed_settings = manager.Namespace()
            # Need to explicitly update every key because of how the Managed Namespace works.
            # Calling exec is the best way to do this I could find. Updating managed_settings.__dict__ doesn't work.
            for key in settings_dict.keys():
                exec('managed_settings.' + key + ' = settings_dict[key]')

            # Distribute processes among available core Pool
            with get_context("spawn").Pool(len(os.sched_getaffinity(0))) as p:
                p.starmap(
                    main_loop,
                    zip(itertools.repeat(managed_settings),
                        itertools.repeat(allthreads),
                        [[thread] for thread in allthreads]))
        else:
            main_loop(settings, allthreads, running)
    except AttributeError:  # os.sched_getaffinity raises AttributeError on non-UNIX systems.
        main_loop(settings, allthreads, running)

    ## Deprecated thread pool
    # pool = ThreadPool(len(allthreads))
    # func = partial(main_loop, settings)
    # results = pool.map(func, [[thread] for thread in allthreads])

    jobtype = factory.jobtype_factory(settings.job_type)
    jobtype.cleanup(settings)

    return 'ATESA run exiting normally'
Example #8
0
    def writeEventsToCsv(self, urls, processedUrlsFName, batchSize=20):
        numUrls = len(urls)
        origNumUrls = numUrls
        urlsWithEvents = 0
        totalEvents = 0
        processedListings = 0
        numTimeouts = 0

        try:
            with open(processedUrlsFName, 'r') as pus:
                pUrls = list(set(pus.read().split('\r\n')))
            logging.info(
                'Already processed {0} of {1} urls. Picking up where we'
                ' left off.'.format(len(pUrls), numUrls))
            urls = [url for url in urls if url not in pUrls]
            numUrls = len(urls)
        except IOError:
            pass

        with open(processedUrlsFName, 'a+') as pus:
            pUrls_writer = csv.writer(pus)
            with open(self.eventFile, 'a+') as f:
                writer = csv.writer(f)
                sttm = time.time()

                if self.eventMode == 'parallel':
                    batches = [
                        urls[x:x + batchSize]
                        for x in xrange(0, len(urls), batchSize)]
                    for b, batch in enumerate(batches):
                        logging.info('Starting batch {0} of  {1}'.format(
                            b + 1, len(batches)))
                        manager = Manager()
                        batchQueue = Queue()
                        batchTimeoutList = manager.list()
                        batchProcessedUrls = manager.list()
                        batchEventQueue = manager.Queue()
                        batchEventsSaved = manager.Value('i', 0)
                        jobs = []
                        for i, url in enumerate(batch):
                            batchQueue.put(
                                [self.eventMode, url, batchEventQueue,
                                 batchProcessedUrls, batchTimeoutList])
                        for i in range(len(batch)):
                            proc = Process(
                                target=self.eventWorker, args=(batchQueue,))
                            proc.start()
                            jobs.append(proc)
                        writeProc = Process(
                            target=self.writeToCsvWorker, args=(
                                batchEventQueue, batchEventsSaved))
                        time.sleep(2)
                        writeProc.start()
                        for j, job in enumerate(jobs):
                            # 5 seconds per url for each process before timeout
                            job.join(max(60, 5 * len(batch)))
                            if job.is_alive():
                                job.terminate()
                                logging.info(
                                    'Subprocess {0} of {1} timed out'.format(
                                        j + 1, min(24, len(batch))))
                        writeProc.join(max(60, 8 * len(batch)))
                        totalEvents += batchEventsSaved.value
                        processedListings += len(batch)
                        for url in set(list(batchProcessedUrls)):
                            pUrls_writer.writerow([url])
                        urlsWithEvents += len(set(list(batchProcessedUrls)))
                        numTimeouts += len(set(list(batchTimeoutList)))
                        durMins, minsLeft = self.timeElapsedLeft(
                            sttm, b + 1, len(batches))
                        logging.info(
                            'Saved {0} new events from {1} of {2} listings. '
                            '\nEstimated time to '
                            'completion: ~{3} min.'.format(
                                batchEventsSaved.value,
                                len(batchProcessedUrls), len(batch), minsLeft))
                        os.system(
                            "ps aux | grep chrome | awk ' { print $2 } ' |"
                            " xargs kill -9")

                elif self.eventMode == 'series':
                    for i, url in enumerate(urls):
                        numEvents = 0
                        events = self.getEventsFromListingUrl(
                            self.eventMode, url, None, urls, [])
                        if events is None:
                            durMins, minsLeft = self.timeElapsedLeft(
                                sttm, i + 1, numUrls)
                            logging.info(
                                'No sales events scraped from listing'
                                ' {0} of {1}. Check url: {2}. {3} min.'
                                'elapsed. {4} min. remaining.'.format(
                                    i + 1, numUrls, url, durMins,
                                    minsLeft))
                            continue
                        for event in events:
                            totalEvents += 1
                            numEvents += 1
                            writer.writerow(event)
                        urlsWithEvents += 1
                        pUrls_writer.writerow([url])
                        durMins, minsLeft = self.timeElapsedLeft(
                            sttm, i, numUrls)
                        if (i + 1) % 1 == 0:
                            logging.info(
                                'Scraped {0} sales events from listing {1}'
                                ' of {2}. Scraped {3} total sales events in'
                                ' {4} min. Estimated time to completion:'
                                ' ~{5} min.'.format(
                                    numEvents, i + 1, numUrls, totalEvents,
                                    durMins, minsLeft))
                else:
                    raise ValueError(
                        'Must specify valid event scraping '
                        'mode: ["parallel", "series"]')
        if numUrls > 0:
            self.pctUrlsWithEvents = round(
                urlsWithEvents / origNumUrls * 100.0, 1)
        else:
            self.pctUrlsWithEvents = -999

        logging.info('#' * 100)
        logging.info('#' * 100)
        logging.info(
            'Scraped events from {0} of {1} ({2}%) urls.'.format(
                urlsWithEvents, numUrls, self.pctUrlsWithEvents).center(
                90, ' ').center(100, '#').upper())
        logging.info(
            ('{0} of {1} urls timed out while scraping events.'.format(
                numTimeouts, numUrls).upper().center(90, ' ').center(
                100, '#')))
        logging.info(
            ('Saved {0} events to {1}'.format(
                totalEvents, self.eventFile).upper().center(
                90, ' ').center(100, '#')))
        logging.info('#' * 100)
        logging.info('#' * 100)
Example #9
0
def osint(self, organization, domain, files, ext, scope_file, aws, aws_fixes,
          html, screenshots, graph, nuke, whoxy_limit, typo, unsafe):
    """
The OSINT toolkit:

This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the wiki for the full details, reporting information, and lists of API keys.

Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    click.secho(asciis.print_art(), fg="magenta")
    click.secho("\tRelease v{}, {}".format(VERSION, CODENAME), fg="magenta")
    click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",
                fg="green")
    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"
    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        rev_domain_list = manager.list()
        # Create reporter object and generate lists of everything, just IP addresses, and just domains
        browser = helpers.setup_headless_chrome(unsafe)
        report = reporter.Reporter(organization, report_path, output_report,
                                   browser)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)
        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        # Phase 1 jobs
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list, rev_domain_list,
                                        organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Hunter",
                                target=report.create_domain_report_table,
                                args=(organization, scope, ip_list,
                                      domain_list, rev_domain_list,
                                      whoxy_limit))
        jobs.append(domain_report)
        # Phase 2 jobs
        shodan_report = Process(name="Shodan Hunter",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        if typo:
            lookalike_report = Process(name="Lookalike Domain Reviewer",
                                       target=report.create_lookalike_table,
                                       args=(organization, domain))
            more_jobs.append(lookalike_report)
        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, browser))
            more_jobs.append(take_screenshots)
        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_metadata_table,
                                   args=(domain, ext, report_path))
            more_jobs.append(files_report)
        # Phase 3 jobs
        cloud_report = Process(name="Cloud Hunter",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)
        # Process the lists of jobs in phases, starting with phase 1
        click.secho(
            "[+] Beginning initial discovery phase! This could take some time...",
            fg="green")
        for job in jobs:
            click.secho("[+] Starting new process: {}".format(job.name),
                        fg="green")
            job.start()
        for job in jobs:
            job.join()
        # Wait for phase 1 and then begin phase 2 jobs
        click.secho(
            "[+] Initial discovery is complete! Proceeding with additional queries...",
            fg="green")
        for job in more_jobs:
            click.secho("[+] Starting new process: {}".format(job.name),
                        fg="green")
            job.start()
        for job in more_jobs:
            job.join()
        # Wait for phase 2 and then begin phase 3 jobs
        click.secho("[+] Final phase: checking the cloud and web services...",
                    fg="green")
        for job in even_more_jobs:
            click.secho("[+] Starting new process: {}".format(job.name),
                        fg="green")
            job.start()
        for job in even_more_jobs:
            job.join()
        # All jobs are done, so close out the SQLIte3 database connection
        report.close_out_reporting()
        click.secho(
            "[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report),
            fg="green")
        # Perform additional tasks depending on the user's command line options
        if graph:
            graph_reporter = grapher.Grapher(output_report)
            click.secho(
                "[+] Loading ODIN database file {} for conversion to Neo4j".
                format(output_report),
                fg="green")
            if nuke:
                if click.confirm(click.style(
                        "[!] You set the --nuke option. This wipes out all nodes for a \
fresh start. Proceed?",
                        fg="red"),
                                 default=True):
                    try:
                        graph_reporter.clear_neo4j_database()
                        click.secho("[+] Database successfully wiped!\n",
                                    fg="green")
                    except Exception as error:
                        click.secho(
                            "[!] Failed to clear the database! Check the Neo4j console and \
your configuration and try running grapher.py again.",
                            fg="red")
                        click.secho("L.. Details: {}".format(error), fg="red")
                else:
                    click.secho(
                        "[!] You can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options.",
                        fg="red")
                try:
                    graph_reporter.convert()
                except Exception as error:
                    click.secho(
                        "[!] Failed to convert the database! Check the Neo4j console and \
your configuration and try running grapher.py again.",
                        fg="red")
                    click.secho("L.. Details: {}".format(error), fg="red")
        if html:
            click.secho("\n[+] Creating the HTML report using {}.".format(
                output_report),
                        fg="green")
            try:
                html_reporter = htmlreporter.HTMLReporter(
                    organization, report_path + "/html_report/", output_report)
                html_reporter.generate_full_report()
            except Exception as error:
                click.secho("[!] Failed to create the HTML report!", fg="red")
                click.secho("L.. Details: {}".format(error), fg="red")