Example #1
0
class MultiThreading(object):
    def __init__(self, funct, data, threads='all'):
        raise Exception("Not functionnal yet !")
        self.funct = funct
        if threads == 'all':
            threads = cpu_count()
        self.pool = Pool(processes=threads)
        self.data = data
        self.PG = None
        self.initializer = None
        self.finalizer = None

    def add_progress_counter(self, init_mess="Beginning", end_mess="Done",
                             name_things='things', perc_interv=5):
        self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess,
                                  nmb_max=len(self.data),
                                  name_things=name_things,
                                  perc_interv=perc_interv)
        self.manager = Manager()
        self.manager.register("PG", self.PG)

    def run(self):
        res = self.pool.map_async(self.PG_func_wrapper, self.data)
        self.pool.close()
        self.pool.join()
        return res
Example #2
0
def spin_crawl_threads(state, classifiers, MAX_BIT_SIZE, MAX_DL_THREADS, image_path):
    print("Running threads...")
    manager = Manager()

    location_q = manager.Queue(maxsize=16)
    image_q = manager.Queue(maxsize=64)
    state_lock = manager.Lock()

    generate_location = Process(target=generate_location_thread,
                                args=(location_q, MAX_BIT_SIZE),
                                name="generate_location")
    classification = Process(target=classification_thread,
                             args=(image_q, classifiers, image_path,
                                   state, state_lock), name="classification")
    download_image_t = Process(target=download_image_thread,
                               args=(location_q, image_q, MAX_DL_THREADS),
                               name="download_image")

    download_image_t.start()
    classification.start()
    generate_location.start()

    def kill_threads():
        for thread in active_children():
            thread.terminate()

    atexit.register(kill_threads)

    download_image_t.join()
    classification.join()
    generate_location.join()
Example #3
0
def main():
    """
    main
    """
    file_to_attack = './data/example_files/S_hecht_submission_3.csv'
    method_order = 'param'
    nb_element = 1
    month_spliter(file_to_attack)
    manager = Manager()
    queue_list = [manager.Queue(1) for _ in range(13)]
    with Pool(6) as p:
        print("HEY")
        p.map(maker, [i for i in range(13)], queue_list)
    for queue in queue_list:
        GUESS_PART.append(queue.get())
    GUESS_PART.sort()
    for i in range(NB_MONTH):
        char = "guess_par_t" + str(i) + ".json"
        with open(char, "w") as jsdump:
            json.dump(GUESS_PART[i][1], jsdump, indent=4)
    write_csv(GUESS_PART[0][1], GUESS_PART[1][1], GUESS_PART[2][1],
              GUESS_PART[3][1], GUESS_PART[4][1], GUESS_PART[5][1],
              GUESS_PART[6][1], GUESS_PART[7][1], GUESS_PART[8][1],
              GUESS_PART[9][1], GUESS_PART[10][1], GUESS_PART[11][1],
              GUESS_PART[12][1])
Example #4
0
 def add_progress_counter(self, init_mess="Beginning", end_mess="Done",
                          name_things='things', perc_interv=5):
     self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess,
                               nmb_max=len(self.data),
                               name_things=name_things,
                               perc_interv=perc_interv)
     self.manager = Manager()
     self.manager.register("PG", self.PG)
Example #5
0
    def run_parallel(self,
                     test_suites,
                     test_runner,
                     result_type=None,
                     results_path=None):

        exit_code = 0
        proc = None
        unittest.installHandler()
        processes = []
        manager = Manager()
        results = manager.dict()
        manager.dict()
        start = time.time()

        test_mapping = {}
        for test_suite in test_suites:
            # Give each test suite an uuid so it can be
            # matched to the correct test result
            test_id = str(uuid.uuid4())
            test_mapping[test_id] = test_suite

            proc = Process(target=self.execute_test,
                           args=(test_runner, test_id, test_suite, results))
            processes.append(proc)
            proc.start()

        for proc in processes:
            proc.join()

        finish = time.time()

        errors, failures, _ = self.dump_results(start, finish, results)

        if result_type is not None:
            all_results = []
            for test_id, result in list(results.items()):
                tests = test_mapping[test_id]
                result_parser = SummarizeResults(vars(result), tests,
                                                 (finish - start))
                all_results += result_parser.gather_results()

            reporter = Reporter(result_parser=result_parser,
                                all_results=all_results)
            reporter.generate_report(result_type=result_type,
                                     path=results_path)

        if failures or errors:
            exit_code = 1

        return exit_code
Example #6
0
    def run_parallel(
            self, test_suites, test_runner, result_type=None,
            results_path=None):

        exit_code = 0
        proc = None
        unittest.installHandler()
        processes = []
        manager = Manager()
        results = manager.dict()
        manager.dict()
        start = time.time()

        test_mapping = {}
        for test_suite in test_suites:
            # Give each test suite an uuid so it can be
            # matched to the correct test result
            test_id = str(uuid.uuid4())
            test_mapping[test_id] = test_suite

            proc = Process(
                target=self.execute_test,
                args=(test_runner, test_id, test_suite, results))
            processes.append(proc)
            proc.start()

        for proc in processes:
            proc.join()

        finish = time.time()

        errors, failures, _ = self.dump_results(start, finish, results)

        if result_type is not None:
            all_results = []
            for test_id, result in list(results.items()):
                tests = test_mapping[test_id]
                result_parser = SummarizeResults(
                    vars(result), tests, (finish - start))
                all_results += result_parser.gather_results()

            reporter = Reporter(
                result_parser=result_parser, all_results=all_results)
            reporter.generate_report(
                result_type=result_type, path=results_path)

        if failures or errors:
            exit_code = 1

        return exit_code
Example #7
0
def query(query_lst):

    manager = Manager()
    hits = manager.dict()

    results = []

    for q in query_lst:
        r = requests.get('http://dblp.uni-trier.de/search/publ/api',
                         params={
                             'q': q,
                             'h': 100,
                             'format': 'json'
                         })

        if r.status_code == 429:
            raise Error

        json_answer = r.json()

        res = json_answer["result"]["hits"].get("hit", None)

        if res is None:
            continue

        results += res

    def f(d, hit, n):

        if hit is None:
            return

        authors = hit["info"].pop("authors")
        if isinstance(authors["author"], dict):
            hit["info"]["authors"] = authors["author"]["text"]
        else:
            hit["info"]["authors"] = [
                fullname(a["text"]) for a in authors["author"]
            ]

        hit["info"]["bibtex"] = get_bib(hit["info"]["key"])
        d[n] = hit["info"]

    job = [
        Process(target=f, args=(hits, hit, n)) for n, hit in enumerate(results)
    ]
    _ = [p.start() for p in job]
    _ = [p.join() for p in job]

    return dict(hits)
Example #8
0
    def get_city_states(self):
        """
        Creates city states from start time to end time
        :param:
        :return:
        """
        city_states = []
        start_time = self.start_time
        end_time = self.end_time

        # Create array of time slice values between the start and end time
        business_days = self.config['city_state_creator']['business_days']
        business_hours_start = self.config['city_state_creator'][
            'business_hours_start']
        business_hours_end = self.config['city_state_creator'][
            'business_hours_end']
        index = pd.date_range(start=start_time,
                              end=end_time,
                              freq=str(self.time_unit_duration) + 'min')

        # Filter only the required days and hours
        index = index[index.day_name().isin(business_days)]
        index = index[(index.hour >= business_hours_start)
                      & (index.hour <= business_hours_end)]
        time_slice_starts = index - timedelta(
            minutes=self.time_slice_duration / 2)
        time_slice_ends = index + timedelta(minutes=self.time_slice_duration /
                                            2)

        # Create arguments dictionary for parallelization
        self.parallel_args = self.create_parallel_args(index,
                                                       time_slice_starts,
                                                       time_slice_ends)

        # Create city states
        manager = Manager()
        city_states = manager.dict()
        N = len(index.values)

        # Create parallel pool
        self.logger.info("Creating parallelization pool")
        pool = ProcessPool(nodes=25)
        pool.map(self.get_city_state, ([city_states, t] for t in xrange(N)))
        pool.close()
        pool.join()
        pool.clear()
        self.logger.info("Finished creating city states")

        return dict(city_states)
    def fit(self, X, Y):
        assert not self.fit_done
        assert len(X) == len(Y)

        possible_labels = list(set(y_val for y in Y for y_val in y))
        job_labels = np.array_split(possible_labels, self.n_jobs)

        with Manager() as manager:
            X_proxy = manager.list(X)
            Y_proxy = manager.list(Y)
            output_queue = Queue()
            processes = [
                Process(target=sequential_execute,
                        args=(output_queue, get_binary_clf_from_multilabel, [{
                            'X':
                            X_proxy,
                            'Y':
                            Y_proxy,
                            'label':
                            lbl,
                            'return_label':
                            True
                        } for lbl in job])) for job in job_labels
            ]
            [p.start() for p in processes]
            results = [output_queue.get()
                       for lbl in possible_labels]  # needs to be flattened
            [p.join() for p in processes]

        self.classifiers = dict(results)
        self.fit_done = True
Example #10
0
def ospf_check():
    clear_log()
    devices = [x.split(',')[0] for x in open(devicesFile)]
    pool = Pool(processor)
    lock = Manager().Lock()
    list(pool.map(partial(_inf_ospf_check, lock), devices))
    pool.close()
    pool.join()
Example #11
0
def run_post_process():
    es = ES(FLAGS.configfile_name)
    manager=Manager()
    lock=manager.Lock()
    shared_dict=manager.dict({'time':0,"id":""})
    process_num=int(cpu_count()-2)

    generator_list=[]
    for i in range(process_num):
        generator_list.append(_generator(lock,shared_dict,es))

    #%%
    p=[]
    for i in range(process_num):
        p.append(Process(target=_process_unknown_record,args=(generator_list[i],)))
        p[i].start()

    for q in p:
        q.join()
Example #12
0
    def create_csv(self):
        if __name__ == '__main__':
            t1 = time()
            file1 = open(self.out_csv1, "w")
            file1.write("id" + ',' + "level" + '\n')
            file2 = open(self.out_csv2, "w")
            file2.write("id" + ',' + "object_name" + '\n')
            file1.close()
            file2.close()

            i = range(len(self.list_of_zips))
            p = Pool()
            m = Manager()
            l = m.Lock()
            func = partial(self.parse_Zip, l)
            p.map(func, i)
            p.close()
            p.join()
        print('Create .csv files time = ' + str(time() - t1) + 's')
Example #13
0
def download_image_thread(location_q, image_q, MAX_DL_THREADS=10):
    print("Running Download Image Thread.")

    max_processes = MAX_DL_THREADS
    print("Creating a thread pool of size {} for downloading images...".format(max_processes))
    pool = Pool(processes=max_processes)
    # Allow us to have n processes runnning, and n processes scheduled to run
    # TODO: Manager is not necessary here, but is used to get around the fact
    # that thread-safe objects cannot be passed by reference, they must be
    # inheretence. A more lightweight solution should be found
    workers = Manager().Semaphore(max_processes*2)

    def async_download(location):
        image = download_image(location)
        image_q.put((location, image), True)
        workers.release()

    while True:
        location = location_q.get(True)
        workers.acquire()
        pool.apply_async(async_download, (location,))
def folderbase_cut_silence(input_folder, cut_interval):

    output_no_silence = os.path.join(input_folder, "remove_silence")
    # if not os.path.exists(output_folder):
    # 	os.mkdir(output_folder)
    if not os.path.exists(output_no_silence):
        os.mkdir(output_no_silence)
    wav_files = []
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            wav_files.append(filename)

    def process_files(lock, file):
        try:
            #exclude log.txt file
            if re.search(".+\.wav", file):
                wave_file = os.path.join(input_folder, file)
                wo_num = cut_wav_without_silence(wave_file, output_no_silence,
                                                 cut_interval)
                with cut_silence_file_num.get_lock():
                    cut_silence_file_num.value += 1
                with cut_silence_out_file_num.get_lock():
                    cut_silence_out_file_num.value += wo_num
                os.remove(wave_file)

        except Exception as e:
            logging.info(e)
            with cut_silence_fail_file.get_lock():
                cut_silence_fail_file.value += 1

    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(wav_files)
    pool.map(process_files, locks, wav_files)
    loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed
		'''.format(cut_silence_file_num.value, cut_silence_out_file_num.value,
             cut_silence_fail_file.value)
    logging.info(loginfo)
Example #15
0
    def __init__(self,
                 initial_patterns_list,
                 input_text_file,
                 data_source_type,
                 read_file_format='rb'):
        self.read_format = read_file_format
        self.pattern_to_data = {}  #defaultdict(PatternData)

        patterns_data = [
            PatternData(pattern) for pattern in initial_patterns_list
        ]
        self.pattern_to_data = Manager().dict(
            {pattern.formatted: pattern
             for pattern in patterns_data})

        patterns_len = [
            len(pattern.split()) for pattern in self.pattern_to_data.keys()
        ]
        self.min_pattern_len = min(patterns_len)
        self.max_pattern_len = max(patterns_len)
        self.data_wrapper = data_wrapper_factory(input_text_file,
                                                 data_source_type)
Example #16
0
def zte_gpon_svlan_check():
    clear_log()
    nodes = graph.cypher.execute(
        "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)"
    )
    olts = ((x[0], x[1]) for x in nodes)
    lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1])
    pool = Pool(8)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, lzte_gpon_svlan), olts))
    pool.close()
    pool.join()
Example #17
0
def svlan_check():
    clear_log()
    #  nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246')
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='company', property_value='zte')
    olts = [(x['ip'], x['company'], x['area']) for x in nodes]
    #  list(map(compose(card_entry, get_card), olts))
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, get_svlan), olts))
    pool.close()
    pool.join()
Example #18
0
File: switch.py Project: sjava/olt
def interface_check_m():
    clear_log()
    #  cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model"
    cmd = "match(s: Switch)  return s.ip, s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    out_inf = partial(output_interface_m, lock)
    list(pool.map(compose(out_inf, get_interface), switchs))
    pool.close()
    pool.join()
Example #19
0
    def __init__(self):
        # The given page size
        self._PAGE_SIZE = 4096

        # The given size for data blocks
        self._BLOCK_SIZE = 1 * self._PAGE_SIZE

        # Meta data about datasets
        self._dataset_table = {}

        # Read/write head position
        self._position = 0

        # Manager for concurrency
        self.manager = Manager()

        # Job-queue for reading data
        self.job_queue = self.manager.list()

        # Data queueueueuueue
        self.data_queues = self.manager.dict()

        # Path to storage file
        _path = 'data.data'

        # Size of storage (Default 200 mb)
        self._SIZE = 4096 * 256 * 200

        # Amount of blocks
        self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE)

        # Check whether a storage file exists, else create one
        if not os.path.exists(_path):
            print('Writing storage file')
            f = open(_path, 'w+b')
            f.write(b'?' * self._SIZE)
            f.close

        # Open storage and create a MMAP
        try:
            storage = open(_path, 'a+b')
        except:
            print('Cannot open storage file!')

        # Create MMAP to file
        self.datamap = mmap.mmap(storage.fileno(), 0)

        # Free space vector
        self.free_space =[(0, self._BLOCKS)]
Example #20
0
def folderbase_convert_to_wave(webmfolder, wavefolder):
    def process_convert(lock, filename):
        my_logger.debug("filename is {}".format(filename))
        with total_num.get_lock():
            total_num.value += 1
        try:
            success = convert_to_wav(filename, wavefolder)
            with success_num.get_lock():
                success_num.value += success
            os.remove(filename)
        except Exception as e:
            line = "\t".join([str(datetime.datetime.now()), filename, str(e)])
            my_logger.info(line)
            fail_folder = "data/convert_failed"
            if not os.path.exists(fail_folder):
                os.mkdir(fail_folder)
            filebase = os.path.basename(filename)
            failed_file = os.path.join(fail_folder, filebase)
            os.rename(filename, failed_file)
            with fail_num.get_lock():
                fail_num.value += 1
        return 1

    filenames = []
    for file in mp3gen(webmfolder):
        if re.search("wav", file): continue
        filenames.append(file)
    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(filenames)
    pool.map(process_convert, locks, filenames)

    my_logger.info(
        "{}/{} files successfully converted to wave and {} files failed".
        format(success_num.value, total_num.value, fail_num.value))
Example #21
0
def hostname_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(hostname_entry, lock)
    list(pool.map(compose(func, get_hostname), olts))
    pool.close()
    pool.join()
    ip_hostname = (x.split(',') for x in open(result_file))
    cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}"
    list(
        map(lambda x: graph.cypher.execute(cmd, ip=x[0], hostname=x[1]),
            ip_hostname))
Example #22
0
def zhongji_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(zhongji_entry, lock)
    list(pool.map(compose(func, get_zhongji), olts))
    pool.close()
    pool.join()
    ports = (x.split(',') for x in open(result_file))
    cmd = """match(n: Olt) where n.ip = {ip} 
    merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) 
    merge(m) - [:Include]->(p: Port{name: {interface}})"""
    list(
        map(
            lambda x: graph.cypher.execute(
                cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
Example #23
0
def add_power_info():
    funcs = {
        'S8508': S85.get_power_info,
        'S8505': S85.get_power_info,
        'T64G': T64.get_power_info,
        'S8905': S89.get_power_info,
        'S8905E': S8905E.get_power_info,
        'S9306': S93.get_power_info,
        'S9303': S93.get_power_info
    }
    get_power_info = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model"
    )
    switches = [dict(ip=x['ip'], model=x['model']) for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_power_info, lock)
    list(pool.map(compose(_ff, get_power_info), switches))
    pool.close()
    pool.join()
Example #24
0
    def sample(self, n_samples: int, beta: float = 1.):
        with Manager() as mgr:
            queues_work = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)]
            queues_return = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)]

            _ = self.parallel_pool.starmap(
                func=worker_init,
                iterable=[(queues_work[idx], queues_return[idx], idx, self.samplers[idx])
                          for idx in range(self.num_chains)])

            worker_results = [self.parallel_pool.apply_async(func=worker_run) for _ in range(self.num_chains)]

            swapped = [None for _ in self.samplers]
            last_samples = [None for _ in self.samplers]
            for i_sample in tqdm(range(int(n_samples))):
                logger.debug('MAIN PROCESS: deploying work...')
                for idx, beta in enumerate(self.betas):
                    queues_work[idx].put((idx, copy.deepcopy(swapped[idx]), beta, False))  # sample
                logger.debug('MAIN PROCESS: waiting for return...')
                for idx in range(len(self.samplers)):
                    idx, last_sample, beta = queues_return[idx].get()  # get sample
                    last_samples[idx] = last_sample
                logger.debug('MAIN PROCESS: swapping samples...')
                swapped = self.swap_samples(last_samples)  # swap samples
                logger.debug('MAIN PROCESS: swapping samples...')
                self.adjust_betas(i_sample, swapped, last_samples)  # adjust temps
            # logger.debug('stopping workers...')
            _ = [queues_work[idx].put((idx, None, 0.00, True)) for idx in range(self.num_chains)]
            _ = [queues_work[idx].join() for idx in range(self.num_chains)]
            # logger.debug('reached getting from finalqueue')
            for worker_result in worker_results:
                idx, sampler_obj = worker_result.get()
                logger.debug(f'GATHERED sampler {idx} trace_x: {len(sampler_obj.trace_x)}')
                self.samplers[idx] = sampler_obj

            self.parallel_pool.close()
            self.parallel_pool.join()
Example #25
0
def add_traffics():
    funcs = {
        'S8508': S85.get_traffics,
        'S8505': S85.get_traffics,
        'T64G': T64.get_traffics,
        'S8905': S89.get_traffics,
        'S8905E': S8905E.get_traffics,
        'S9306': S93.get_traffics,
        'S9303': S93.get_traffics
    }
    get_traffics = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model"
    )
    switchs = [
        dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes
    ]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_traffics, lock)
    list(pool.map(compose(_ff, get_traffics), switchs))
    pool.close()
    pool.join()
def folderbase_cut_interval(input_folder, output_folder, cut_period):
    wav_files = []
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            wav_files.append(os.path.join(root, filename))


#	for file in wav_files:

    def process_files(lock, file):
        try:
            if re.search(".+\.wav", file):
                with file_num.get_lock():
                    file_num.value += 1
                filebasename = os.path.basename(file)
                filebasename, _ = os.path.splitext(filebasename)
                #get audio properties
                audio_prop = {}
                with wave.open(file, mode='rb') as newAudio:
                    audio_prop["nchannels"] = newAudio.getnchannels()
                    audio_prop["nframes"] = newAudio.getnframes()
                    audio_prop["sampwidth"] = newAudio.getsampwidth()
                    audio_prop["framerate"] = newAudio.getframerate()
                    audio_prop["comptype"] = newAudio.getcomptype()
                    audio_prop["compname"] = newAudio.getcompname()
                audio_duration = audio_prop["nframes"] / audio_prop["framerate"]

                precut_duration = cut_period
                cut_start = 0
                cut_return = 0
                cut_num = 0
                index = 0
                while cut_start < audio_duration:
                    cut_end = cut_start + precut_duration
                    cut_audio, cutaudio_prop = cut_wave(file,
                                                        cut_start,
                                                        cut_end,
                                                        start_bias=0,
                                                        end_bias=0)
                    newfile = os.path.join(
                        output_folder,
                        filebasename + "_" + str(index) + ".wav")
                    index += 1
                    with wave.open(newfile, "wb") as newAudio:
                        newAudio.setparams((cutaudio_prop["nchannels"],
                                            cutaudio_prop["sampwidth"],
                                            cutaudio_prop["framerate"],
                                            cutaudio_prop["nframes"],
                                            cutaudio_prop["comptype"],
                                            cutaudio_prop["compname"]))
                        newAudio.writeframes(cut_audio)
                    cut_start = cut_start + precut_duration
                    with out_file_num.get_lock():
                        out_file_num.value += 1
                os.remove(file)
        except Exception as e:
            logging.info(e)
            with fail_file.get_lock():
                fail_file.value += 1

    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(wav_files)
    pool.map(process_files, locks, wav_files)
    loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed
	'''.format(file_num.value, out_file_num.value, fail_file.value)
    logging.info(loginfo)
Example #27
0
# Settings: Location, Units, and rapidfire (optional)
latitude = check_env_var("HZN_LAT", printerr=True)
longitude = check_env_var("HZN_LON", printerr=True)
pws_units = check_env_var("PWS_UNITS", default='us', printerr=True)    # weewx recommends only using 'us'
pws_wu_loc = check_env_var("PWS_WU_LOC", default='', printerr=True)
pws_wu_rapidfire = check_env_var("PWS_WU_RPDF", default='False', printerr=True)

# Deal with a potential lower-case (boolean value from Horizon) or erroneous value
if pws_wu_rapidfire == "true" or pws_wu_rapidfire == "True": 
    pws_wu_rapidfire = "True"
else: 
    pws_wu_rapidfire = "False"


## Shared data structure (dict for flask server to read & serve)
manager = Manager()
sdata = manager.dict()
standard_params = ["wu_id", "stationtype", "model", "latitude", "longitude", "units", "location"]
standard_values = [pws_wu_id, pws_station_type, pws_model, latitude, longitude, pws_units, pws_wu_loc]
sdata["r"] = dict(zip(["status"], ["Station initializing..."]))
sdata["t"] = str(int(time.time()))                                      # Timestamp
sdata["i"] = dict(zip(standard_params, standard_values))                # Station Info

## Flask HTTPserver ----------------------------------------------------------
## Start simple flask server at localhost:port and pass in shared data dict
p_flask = Process(target=fl.run_server, args=('0.0.0.0', 8357, sdata))
p_flask.start()

## Weewx service -------------------------------------------------------------
# Modify the weewx configuration file with our env var settings
weemod = weewx_mod(weewx_config_file, pws_station_type)
Example #28
0
def osint(self,organization,domain,files,ext,scope_file,aws,aws_fixes,html,
          screenshots,graph,nuke,whoxy_limit,typo,unsafe):
    """
The OSINT toolkit:

This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the wiki for the full details, reporting information, and lists of API keys.

Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    click.secho(asciis.print_art(),fg="magenta")
    click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta")
    click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",fg="green")
    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"
    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        rev_domain_list = manager.list()
        # Create reporter object and generate lists of everything, just IP addresses, and just domains
        browser = helpers.setup_headless_chrome(unsafe)
        report = reporter.Reporter(organization,report_path,output_report,browser)
        report.create_tables()
        scope,ip_list,domain_list = report.prepare_scope(ip_list,domain_list,scope_file,domain)
        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        # Phase 1 jobs
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain,))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list,rev_domain_list,organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Hunter",
                                target=report.create_domain_report_table,
                                args=(organization,scope,ip_list,domain_list,rev_domain_list,whoxy_limit))
        jobs.append(domain_report)
        # Phase 2 jobs
        shodan_report = Process(name="Shodan Hunter",
                                target=report.create_shodan_table,
                                args=(ip_list,domain_list))
        more_jobs.append(shodan_report)
        if typo:
            lookalike_report = Process(name="Lookalike Domain Reviewer",
                                      target=report.create_lookalike_table,
                                      args=(organization,domain))
            more_jobs.append(lookalike_report)
        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path,browser))
            more_jobs.append(take_screenshots)
        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_metadata_table,
                                   args=(domain,ext,report_path))
            more_jobs.append(files_report)
        # Phase 3 jobs
        cloud_report = Process(name="Cloud Hunter",
                               target=report.create_cloud_table,
                               args=(organization,domain,aws,aws_fixes))
        even_more_jobs.append(cloud_report)
        # Process the lists of jobs in phases, starting with phase 1
        click.secho("[+] Beginning initial discovery phase! This could take some time...",fg="green")
        for job in jobs:
            click.secho("[+] Starting new process: {}".format(job.name),fg="green")
            job.start()
        for job in jobs:
            job.join()
        # Wait for phase 1 and then begin phase 2 jobs
        click.secho("[+] Initial discovery is complete! Proceeding with additional queries...",fg="green")
        for job in more_jobs:
            click.secho("[+] Starting new process: {}".format(job.name),fg="green")
            job.start()
        for job in more_jobs:
            job.join()
        # Wait for phase 2 and then begin phase 3 jobs
        click.secho("[+] Final phase: checking the cloud and web services...",fg="green")
        for job in even_more_jobs:
            click.secho("[+] Starting new process: {}".format(job.name),fg="green")
            job.start()
        for job in even_more_jobs:
            job.join()
        # All jobs are done, so close out the SQLIte3 database connection
        report.close_out_reporting()
        click.secho("[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report),fg="green")
        # Perform additional tasks depending on the user's command line options
        if graph:
            graph_reporter = grapher.Grapher(output_report)
            click.secho("[+] Loading ODIN database file {} for conversion to Neo4j".format(output_report),fg="green")
            if nuke:
                if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \
fresh start. Proceed?",fg="red"),default=True):
                    try:
                        graph_reporter.clear_neo4j_database()
                        click.secho("[+] Database successfully wiped!\n",fg="green")
                    except Exception as error:
                        click.secho("[!] Failed to clear the database! Check the Neo4j console and \
your configuration and try running grapher.py again.",fg="red")
                        click.secho("L.. Details: {}".format(error),fg="red")
                else:
                    click.secho("[!] You can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options.",fg="red")
                try:
                    graph_reporter.convert()
                except Exception as error:
                    click.secho("[!] Failed to convert the database! Check the Neo4j console and \
your configuration and try running grapher.py again.",fg="red")
                    click.secho("L.. Details: {}".format(error),fg="red")
        if html:
            click.secho("\n[+] Creating the HTML report using {}.".format(output_report),fg="green")
            try:
                html_reporter = htmlreporter.HTMLReporter(organization,report_path + "/html_report/",output_report)
                html_reporter.generate_full_report()
            except Exception as error:
                click.secho("[!] Failed to create the HTML report!",fg="red")
                click.secho("L.. Details: {}".format(error),fg="red")
from multiprocess import Manager, Process


def fun(d, l):
    d[1] = '1'
    d[2] = 2
    d[0.25] = None
    l.reverse()


if __name__ == '__main__':
    manager = Manager()

    d = manager.dict()
    l = manager.list(range(10))

    p = Process(target=fun, args=(d, l))
    p.start()
    p.join()

    print d
    print l
    def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        manager = Manager()
        predictQueue = manager.Queue()

        # workaround as predict does not support batches atm
        # add dummy dimension to let embedInputData work properly (is optimized to work for batches)
        inputData = inputData.reshape(1, *inputData.shape)
        modifiedInputData = self._embedInputData(inputData)
        modifiedInputData = modifiedInputData[0]
        inputData = inputData[0]

        self.transientTime = transientTime
        self.sharedNamespace.transientTime = transientTime
        predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime))

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]),
                        axis=rank).reshape(-1, rank).tolist()
        nJobs = len(jobs)

        self.resetState()

        iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess,
                    initargs=[predictQueue, self])
        pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = predictQueue.get()
                # result of predicting
                indices, prediction, state = data
                id = self._uniqueIDFromIndices(indices)
                self._xs[id] = state
                # update the values
                predictionOutput[tuple([Ellipsis] + indices)] = prediction

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()

        return predictionOutput
    def fit(self, inputData, outputData, transientTime=0, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        # reshape the input so that it has the shape (timeseries, time, input_dimension^n)
        if rank == self.n_inputDimensions:
            inputData = inputData.reshape(1, *inputData.shape)
            outputData = outputData.reshape(1, *outputData.shape)
        else:
            # modify rank again
            rank -= 1

        partialLength = (inputData.shape[1] - transientTime)
        totalLength = inputData.shape[0] * partialLength
        timeseriesCount = inputData.shape[0]

        manager = Manager()
        fitQueue = manager.Queue()

        modifiedInputData = self._embedInputData(inputData)

        self.sharedNamespace.transientTime = transientTime

        self.sharedNamespace.partialLength = partialLength
        self.sharedNamespace.totalLength = totalLength
        self.sharedNamespace.timeseriesCount = timeseriesCount

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]),
                        axis=rank).reshape(-1, rank).tolist()

        nJobs = len(jobs)

        self.resetState()

        iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self])
        pool.map_async(self._fitProcess, iterator, chunksize=16)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = fitQueue.get()

                # result of fitting
                indices, x, WOut = data
                id = self._uniqueIDFromIndices(indices)

                if WOut is None:
                    import sys
                    print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr)

                # store WOut
                if self._averageOutputWeights:
                    if WOut is not None:
                        self._WOut += WOut / np.prod(self.inputShape)
                else:
                    self._WOuts[id] = WOut

                    # store x
                self._xs[id] = x

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()
    def __init__(self, inputShape, n_reservoir,
                 filterSize=1, stride=1, borderMode="mirror", nWorkers="auto",
                 spectralRadius=1.0, noiseLevel=0.0, inputScaling=None,
                 leakingRate=1.0, reservoirDensity=0.2, randomSeed=None, averageOutputWeights=True,
                 out_activation=lambda x: x, out_inverse_activation=lambda x: x,
                 weightGeneration='naive', bias=1.0, outputBias=1.0,
                 outputInputScaling=1.0, inputDensity=1.0, solver='pinv', regressionParameters={}, activation=B.tanh,
                 activationDerivation=lambda x: 1.0 / B.cosh(x) ** 2):

        self._averageOutputWeights = averageOutputWeights
        if averageOutputWeights and solver != "lsqr":
            raise ValueError(
                "`averageOutputWeights` can only be set to `True` when `solver` is set to `lsqr` (Ridge Regression)")

        self._borderMode = borderMode
        if not borderMode in ["mirror", "padding", "edge", "wrap"]:
            raise ValueError(
                "`borderMode` must be set to one of the following values: `mirror`, `padding`, `edge` or `wrap`.")

        self._regressionParameters = regressionParameters
        self._solver = solver

        n_inputDimensions = len(inputShape)

        if filterSize % 2 == 0:
            raise ValueError("filterSize has to be an odd number (1, 3, 5, ...).")
        self._filterSize = filterSize
        self._filterWidth = int(np.floor(filterSize / 2))
        self._stride = stride

        self._n_input = int(np.power(np.ceil(filterSize / stride), n_inputDimensions))

        self.n_inputDimensions = n_inputDimensions
        self.inputShape = inputShape

        if not self._averageOutputWeights:
            self._WOuts = B.empty((np.prod(inputShape), 1, self._n_input + n_reservoir + 1))
            self._WOut = None
        else:
            self._WOuts = None
            self._WOut = B.zeros((1, self._n_input + n_reservoir + 1))
        self._xs = B.empty((np.prod(inputShape), n_reservoir, 1))

        if nWorkers == "auto":
            self._nWorkers = np.max((cpu_count() - 1, 1))
        else:
            self._nWorkers = nWorkers

        manager = Manager()
        self.sharedNamespace = manager.Namespace()
        if hasattr(self, "fitWorkerID") == False or self.parallelWorkerIDs is None:
            self.parallelWorkerIDs = manager.Queue()
            for i in range(self._nWorkers):
                self.parallelWorkerIDs.put((i))

        super(SpatioTemporalESN, self).__init__(n_input=self._n_input, n_reservoir=n_reservoir, n_output=1,
                                                spectralRadius=spectralRadius,
                                                noiseLevel=noiseLevel, inputScaling=inputScaling,
                                                leakingRate=leakingRate, reservoirDensity=reservoirDensity,
                                                randomSeed=randomSeed, out_activation=out_activation,
                                                out_inverse_activation=out_inverse_activation,
                                                weightGeneration=weightGeneration, bias=bias, outputBias=outputBias,
                                                outputInputScaling=outputInputScaling,
                                                inputDensity=inputDensity, activation=activation,
                                                activationDerivation=activationDerivation)

        """
Example #33
0
class Storage(object):
    '''
    Storage system
    '''
    def __init__(self):
        # The given page size
        self._PAGE_SIZE = 4096

        # The given size for data blocks
        self._BLOCK_SIZE = 1 * self._PAGE_SIZE

        # Meta data about datasets
        self._dataset_table = {}

        # Read/write head position
        self._position = 0

        # Manager for concurrency
        self.manager = Manager()

        # Job-queue for reading data
        self.job_queue = self.manager.list()

        # Data queueueueuueue
        self.data_queues = self.manager.dict()

        # Path to storage file
        _path = 'data.data'

        # Size of storage (Default 200 mb)
        self._SIZE = 4096 * 256 * 200

        # Amount of blocks
        self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE)

        # Check whether a storage file exists, else create one
        if not os.path.exists(_path):
            print('Writing storage file')
            f = open(_path, 'w+b')
            f.write(b'?' * self._SIZE)
            f.close

        # Open storage and create a MMAP
        try:
            storage = open(_path, 'a+b')
        except:
            print('Cannot open storage file!')

        # Create MMAP to file
        self.datamap = mmap.mmap(storage.fileno(), 0)

        # Free space vector
        self.free_space =[(0, self._BLOCKS)]


    def _write_data(self, address, data_block, flush=True):
        '''
        Writes a data block to the page at the given address
        '''
        print('¤ Writing data block at ' + str(address))
        try:
            # Go to the current address
            self.datamap.seek(address)
            self._position = address

            # Write the block
            self.datamap.write(bytes(data_block, 'utf-8'))
        except:
            print('! Could not write data block to ' + str(address) + '. Not enough space.')

        # Flush the written data to the file
        if flush:
            try:
                self.datamap.flush()
            except:
                print("Cannot flush data with mmap!")
                pass


    def _read_block(self, address):
        '''
        Writes data to a given address
        '''
        print('+ Reading data from ' + str(address))
        data = ''
        try:
            # Go to the current address
            self.datamap.seek(address)
            self._position = address

            # Read the data
            data = self.datamap.read(self._PAGE_SIZE)
        except:
            print('Could not read data block from ' + str(address))

        return data


    def _worst_fit(self, n_blocks):
        '''
        Data block allocation using worst-fit
        '''
        # Get the largest free segment
        #! Faster to use max-heaps
        largest_segment = sorted(self.free_space, key=lambda x: x[1])[0]
        blocks_amount = largest_segment[1]

        assert blocks_amount >= n_blocks

        # Construct a list of free datablocks
        free_blocks = []
        current_block = largest_segment[0]
        for _ in range(n_blocks):
            free_blocks.append(current_block)
            current_block += self._BLOCK_SIZE

        # Remove the free space and add the remaining
        # free space after allocation
        self.free_space.remove(largest_segment)
        self.free_space.append((current_block, blocks_amount - n_blocks))

        return free_blocks


    def _request_blocks(self, n_blocks):
        return self._worst_fit(n_blocks)


    def get_size(self, dataset_id):
        '''
        Get the amount of blocks in a dataset
        '''
        return self._dataset_table[dataset_id].size


    def append_data(self, dataset_id, data_block, address, flush=True):
        '''
        Append data to an existing dataset
        '''
        # Check if there is any more allocated space
        # for the dataset
        if self._dataset_table[dataset_id].space_left():
            # Write data block and increament size
            self._write_data(address, data_block, flush)
            self._dataset_table[dataset_id].size+=1
            return address


    def add_dataset(self, dataset_id, dataset, size=None):
        '''
        Add a new dataset to the storage
        '''
        # Add metadata about the dataset
        if size:
            current_size = size
        else:
            current_size = len(dataset)

        self._dataset_table[dataset_id] = Dataset(current_size)

        requested_blocks = self._request_blocks(current_size)

        assert len(requested_blocks) >= len(dataset)

        # Write the data blocks to a file
        block_index = 0
        for data_block in dataset:
            self.append_data(dataset_id, data_block, requested_blocks[block_index], flush=False)
            self._dataset_table[dataset_id].append_block_index(requested_blocks[block_index])
            block_index += 1

        try:
            self.datamap.flush()
        except:
            print("Cannot flush data with mmap!")
            pass

    def read_data(self, dataset_id, data_queue):
        '''
        Run the execution-queue for a given dataset
        '''
        # Generate a random id (6 characters)
        data_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6))

        dataset = self._dataset_table[dataset_id]

        self.data_queues[data_id] = data_queue

        for address in dataset.datablocks:
            self.job_queue.append((address, data_id))

        return dataset.datablocks


    def reader(self):
        '''
        A reading process, which serves data blocks requests from read_data
        '''
        while True:
            # Sort the list of jobs by their address
            jobs = sorted(self.job_queue, key=lambda x: x[0])

            try:
                # Find the job with the closest highest address
                (address, data_id) = next(x for x in jobs if x[0] >= self._position)

                # Read the data from disc
                data = self._read_block(address)

                # Serve data to the requesting process
                self.data_queues[data_id].put(data)

                # Remove the job from the list
                self.job_queue.remove((address, data_id))
            except:
                # No jobs found. Start from position 0.
                self._position = 0
                time.sleep(0.01)
Example #34
0
def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, html, screenshots, graph, nuke, whoxy_limit):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the README for the full detailsand lists of API keys!

Note: If providing a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    verbose = None

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(report_path, output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(organization, scope, ip_list,
                                      domain_list, whoxy_limit))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            more_jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green(
                "[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report)))

        if graph:
            graph_reporter = grapher.Grapher(output_report)
            print(
                green(
                    "[+] Loading ODIN database file {} for conversion to Neo4j"
                ).format(output_report))

            if nuke:
                confirm = input(
                    red("\n[!] You set the --nuke option. This wipes out all nodes \
for a fresh start. Proceed? (Y\\N) "))
                if confirm.lower() == "y":
                    graph_reporter.clear_neo4j_database()
                    print(green("[+] Database successfully wiped!\n"))
                    graph_reporter.convert()
                else:
                    print(
                        red("[!] Then you can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options."))
            else:
                graph_reporter.convert()

        if html:
            print(
                green("\n[+] Creating the HTML report using {}.".format(
                    output_report)))
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()