class MultiThreading(object): def __init__(self, funct, data, threads='all'): raise Exception("Not functionnal yet !") self.funct = funct if threads == 'all': threads = cpu_count() self.pool = Pool(processes=threads) self.data = data self.PG = None self.initializer = None self.finalizer = None def add_progress_counter(self, init_mess="Beginning", end_mess="Done", name_things='things', perc_interv=5): self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess, nmb_max=len(self.data), name_things=name_things, perc_interv=perc_interv) self.manager = Manager() self.manager.register("PG", self.PG) def run(self): res = self.pool.map_async(self.PG_func_wrapper, self.data) self.pool.close() self.pool.join() return res
def spin_crawl_threads(state, classifiers, MAX_BIT_SIZE, MAX_DL_THREADS, image_path): print("Running threads...") manager = Manager() location_q = manager.Queue(maxsize=16) image_q = manager.Queue(maxsize=64) state_lock = manager.Lock() generate_location = Process(target=generate_location_thread, args=(location_q, MAX_BIT_SIZE), name="generate_location") classification = Process(target=classification_thread, args=(image_q, classifiers, image_path, state, state_lock), name="classification") download_image_t = Process(target=download_image_thread, args=(location_q, image_q, MAX_DL_THREADS), name="download_image") download_image_t.start() classification.start() generate_location.start() def kill_threads(): for thread in active_children(): thread.terminate() atexit.register(kill_threads) download_image_t.join() classification.join() generate_location.join()
def main(): """ main """ file_to_attack = './data/example_files/S_hecht_submission_3.csv' method_order = 'param' nb_element = 1 month_spliter(file_to_attack) manager = Manager() queue_list = [manager.Queue(1) for _ in range(13)] with Pool(6) as p: print("HEY") p.map(maker, [i for i in range(13)], queue_list) for queue in queue_list: GUESS_PART.append(queue.get()) GUESS_PART.sort() for i in range(NB_MONTH): char = "guess_par_t" + str(i) + ".json" with open(char, "w") as jsdump: json.dump(GUESS_PART[i][1], jsdump, indent=4) write_csv(GUESS_PART[0][1], GUESS_PART[1][1], GUESS_PART[2][1], GUESS_PART[3][1], GUESS_PART[4][1], GUESS_PART[5][1], GUESS_PART[6][1], GUESS_PART[7][1], GUESS_PART[8][1], GUESS_PART[9][1], GUESS_PART[10][1], GUESS_PART[11][1], GUESS_PART[12][1])
def add_progress_counter(self, init_mess="Beginning", end_mess="Done", name_things='things', perc_interv=5): self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess, nmb_max=len(self.data), name_things=name_things, perc_interv=perc_interv) self.manager = Manager() self.manager.register("PG", self.PG)
def run_parallel(self, test_suites, test_runner, result_type=None, results_path=None): exit_code = 0 proc = None unittest.installHandler() processes = [] manager = Manager() results = manager.dict() manager.dict() start = time.time() test_mapping = {} for test_suite in test_suites: # Give each test suite an uuid so it can be # matched to the correct test result test_id = str(uuid.uuid4()) test_mapping[test_id] = test_suite proc = Process(target=self.execute_test, args=(test_runner, test_id, test_suite, results)) processes.append(proc) proc.start() for proc in processes: proc.join() finish = time.time() errors, failures, _ = self.dump_results(start, finish, results) if result_type is not None: all_results = [] for test_id, result in list(results.items()): tests = test_mapping[test_id] result_parser = SummarizeResults(vars(result), tests, (finish - start)) all_results += result_parser.gather_results() reporter = Reporter(result_parser=result_parser, all_results=all_results) reporter.generate_report(result_type=result_type, path=results_path) if failures or errors: exit_code = 1 return exit_code
def run_parallel( self, test_suites, test_runner, result_type=None, results_path=None): exit_code = 0 proc = None unittest.installHandler() processes = [] manager = Manager() results = manager.dict() manager.dict() start = time.time() test_mapping = {} for test_suite in test_suites: # Give each test suite an uuid so it can be # matched to the correct test result test_id = str(uuid.uuid4()) test_mapping[test_id] = test_suite proc = Process( target=self.execute_test, args=(test_runner, test_id, test_suite, results)) processes.append(proc) proc.start() for proc in processes: proc.join() finish = time.time() errors, failures, _ = self.dump_results(start, finish, results) if result_type is not None: all_results = [] for test_id, result in list(results.items()): tests = test_mapping[test_id] result_parser = SummarizeResults( vars(result), tests, (finish - start)) all_results += result_parser.gather_results() reporter = Reporter( result_parser=result_parser, all_results=all_results) reporter.generate_report( result_type=result_type, path=results_path) if failures or errors: exit_code = 1 return exit_code
def query(query_lst): manager = Manager() hits = manager.dict() results = [] for q in query_lst: r = requests.get('http://dblp.uni-trier.de/search/publ/api', params={ 'q': q, 'h': 100, 'format': 'json' }) if r.status_code == 429: raise Error json_answer = r.json() res = json_answer["result"]["hits"].get("hit", None) if res is None: continue results += res def f(d, hit, n): if hit is None: return authors = hit["info"].pop("authors") if isinstance(authors["author"], dict): hit["info"]["authors"] = authors["author"]["text"] else: hit["info"]["authors"] = [ fullname(a["text"]) for a in authors["author"] ] hit["info"]["bibtex"] = get_bib(hit["info"]["key"]) d[n] = hit["info"] job = [ Process(target=f, args=(hits, hit, n)) for n, hit in enumerate(results) ] _ = [p.start() for p in job] _ = [p.join() for p in job] return dict(hits)
def get_city_states(self): """ Creates city states from start time to end time :param: :return: """ city_states = [] start_time = self.start_time end_time = self.end_time # Create array of time slice values between the start and end time business_days = self.config['city_state_creator']['business_days'] business_hours_start = self.config['city_state_creator'][ 'business_hours_start'] business_hours_end = self.config['city_state_creator'][ 'business_hours_end'] index = pd.date_range(start=start_time, end=end_time, freq=str(self.time_unit_duration) + 'min') # Filter only the required days and hours index = index[index.day_name().isin(business_days)] index = index[(index.hour >= business_hours_start) & (index.hour <= business_hours_end)] time_slice_starts = index - timedelta( minutes=self.time_slice_duration / 2) time_slice_ends = index + timedelta(minutes=self.time_slice_duration / 2) # Create arguments dictionary for parallelization self.parallel_args = self.create_parallel_args(index, time_slice_starts, time_slice_ends) # Create city states manager = Manager() city_states = manager.dict() N = len(index.values) # Create parallel pool self.logger.info("Creating parallelization pool") pool = ProcessPool(nodes=25) pool.map(self.get_city_state, ([city_states, t] for t in xrange(N))) pool.close() pool.join() pool.clear() self.logger.info("Finished creating city states") return dict(city_states)
def fit(self, X, Y): assert not self.fit_done assert len(X) == len(Y) possible_labels = list(set(y_val for y in Y for y_val in y)) job_labels = np.array_split(possible_labels, self.n_jobs) with Manager() as manager: X_proxy = manager.list(X) Y_proxy = manager.list(Y) output_queue = Queue() processes = [ Process(target=sequential_execute, args=(output_queue, get_binary_clf_from_multilabel, [{ 'X': X_proxy, 'Y': Y_proxy, 'label': lbl, 'return_label': True } for lbl in job])) for job in job_labels ] [p.start() for p in processes] results = [output_queue.get() for lbl in possible_labels] # needs to be flattened [p.join() for p in processes] self.classifiers = dict(results) self.fit_done = True
def ospf_check(): clear_log() devices = [x.split(',')[0] for x in open(devicesFile)] pool = Pool(processor) lock = Manager().Lock() list(pool.map(partial(_inf_ospf_check, lock), devices)) pool.close() pool.join()
def run_post_process(): es = ES(FLAGS.configfile_name) manager=Manager() lock=manager.Lock() shared_dict=manager.dict({'time':0,"id":""}) process_num=int(cpu_count()-2) generator_list=[] for i in range(process_num): generator_list.append(_generator(lock,shared_dict,es)) #%% p=[] for i in range(process_num): p.append(Process(target=_process_unknown_record,args=(generator_list[i],))) p[i].start() for q in p: q.join()
def create_csv(self): if __name__ == '__main__': t1 = time() file1 = open(self.out_csv1, "w") file1.write("id" + ',' + "level" + '\n') file2 = open(self.out_csv2, "w") file2.write("id" + ',' + "object_name" + '\n') file1.close() file2.close() i = range(len(self.list_of_zips)) p = Pool() m = Manager() l = m.Lock() func = partial(self.parse_Zip, l) p.map(func, i) p.close() p.join() print('Create .csv files time = ' + str(time() - t1) + 's')
def download_image_thread(location_q, image_q, MAX_DL_THREADS=10): print("Running Download Image Thread.") max_processes = MAX_DL_THREADS print("Creating a thread pool of size {} for downloading images...".format(max_processes)) pool = Pool(processes=max_processes) # Allow us to have n processes runnning, and n processes scheduled to run # TODO: Manager is not necessary here, but is used to get around the fact # that thread-safe objects cannot be passed by reference, they must be # inheretence. A more lightweight solution should be found workers = Manager().Semaphore(max_processes*2) def async_download(location): image = download_image(location) image_q.put((location, image), True) workers.release() while True: location = location_q.get(True) workers.acquire() pool.apply_async(async_download, (location,))
def folderbase_cut_silence(input_folder, cut_interval): output_no_silence = os.path.join(input_folder, "remove_silence") # if not os.path.exists(output_folder): # os.mkdir(output_folder) if not os.path.exists(output_no_silence): os.mkdir(output_no_silence) wav_files = [] for root, dirs, files in os.walk(input_folder): for filename in files: wav_files.append(filename) def process_files(lock, file): try: #exclude log.txt file if re.search(".+\.wav", file): wave_file = os.path.join(input_folder, file) wo_num = cut_wav_without_silence(wave_file, output_no_silence, cut_interval) with cut_silence_file_num.get_lock(): cut_silence_file_num.value += 1 with cut_silence_out_file_num.get_lock(): cut_silence_out_file_num.value += wo_num os.remove(wave_file) except Exception as e: logging.info(e) with cut_silence_fail_file.get_lock(): cut_silence_fail_file.value += 1 pool = Pool(process_num) m = Manager() lock = m.Lock() locks = [lock] * len(wav_files) pool.map(process_files, locks, wav_files) loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed '''.format(cut_silence_file_num.value, cut_silence_out_file_num.value, cut_silence_fail_file.value) logging.info(loginfo)
def __init__(self, initial_patterns_list, input_text_file, data_source_type, read_file_format='rb'): self.read_format = read_file_format self.pattern_to_data = {} #defaultdict(PatternData) patterns_data = [ PatternData(pattern) for pattern in initial_patterns_list ] self.pattern_to_data = Manager().dict( {pattern.formatted: pattern for pattern in patterns_data}) patterns_len = [ len(pattern.split()) for pattern in self.pattern_to_data.keys() ] self.min_pattern_len = min(patterns_len) self.max_pattern_len = max(patterns_len) self.data_wrapper = data_wrapper_factory(input_text_file, data_source_type)
def zte_gpon_svlan_check(): clear_log() nodes = graph.cypher.execute( "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)" ) olts = ((x[0], x[1]) for x in nodes) lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1]) pool = Pool(8) lock = Manager().Lock() func = partial(svlan_entry, lock) list(pool.map(compose(func, lzte_gpon_svlan), olts)) pool.close() pool.join()
def svlan_check(): clear_log() # nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246') nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='company', property_value='zte') olts = [(x['ip'], x['company'], x['area']) for x in nodes] # list(map(compose(card_entry, get_card), olts)) pool = Pool(16) lock = Manager().Lock() func = partial(svlan_entry, lock) list(pool.map(compose(func, get_svlan), olts)) pool.close() pool.join()
def interface_check_m(): clear_log() # cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model" cmd = "match(s: Switch) return s.ip, s.model" # cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2" nodes = graph.cypher.execute(cmd) switchs = [(x[0], x[1]) for x in nodes] pool = Pool(16) lock = Manager().Lock() out_inf = partial(output_interface_m, lock) list(pool.map(compose(out_inf, get_interface), switchs)) pool.close() pool.join()
def __init__(self): # The given page size self._PAGE_SIZE = 4096 # The given size for data blocks self._BLOCK_SIZE = 1 * self._PAGE_SIZE # Meta data about datasets self._dataset_table = {} # Read/write head position self._position = 0 # Manager for concurrency self.manager = Manager() # Job-queue for reading data self.job_queue = self.manager.list() # Data queueueueuueue self.data_queues = self.manager.dict() # Path to storage file _path = 'data.data' # Size of storage (Default 200 mb) self._SIZE = 4096 * 256 * 200 # Amount of blocks self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE) # Check whether a storage file exists, else create one if not os.path.exists(_path): print('Writing storage file') f = open(_path, 'w+b') f.write(b'?' * self._SIZE) f.close # Open storage and create a MMAP try: storage = open(_path, 'a+b') except: print('Cannot open storage file!') # Create MMAP to file self.datamap = mmap.mmap(storage.fileno(), 0) # Free space vector self.free_space =[(0, self._BLOCKS)]
def folderbase_convert_to_wave(webmfolder, wavefolder): def process_convert(lock, filename): my_logger.debug("filename is {}".format(filename)) with total_num.get_lock(): total_num.value += 1 try: success = convert_to_wav(filename, wavefolder) with success_num.get_lock(): success_num.value += success os.remove(filename) except Exception as e: line = "\t".join([str(datetime.datetime.now()), filename, str(e)]) my_logger.info(line) fail_folder = "data/convert_failed" if not os.path.exists(fail_folder): os.mkdir(fail_folder) filebase = os.path.basename(filename) failed_file = os.path.join(fail_folder, filebase) os.rename(filename, failed_file) with fail_num.get_lock(): fail_num.value += 1 return 1 filenames = [] for file in mp3gen(webmfolder): if re.search("wav", file): continue filenames.append(file) pool = Pool(process_num) m = Manager() lock = m.Lock() locks = [lock] * len(filenames) pool.map(process_convert, locks, filenames) my_logger.info( "{}/{} files successfully converted to wave and {} files failed". format(success_num.value, total_num.value, fail_num.value))
def hostname_check(): clear_log() nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46') olts = [(x['ip'], x['company']) for x in nodes] pool = Pool(16) lock = Manager().Lock() func = partial(hostname_entry, lock) list(pool.map(compose(func, get_hostname), olts)) pool.close() pool.join() ip_hostname = (x.split(',') for x in open(result_file)) cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}" list( map(lambda x: graph.cypher.execute(cmd, ip=x[0], hostname=x[1]), ip_hostname))
def zhongji_check(): clear_log() nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46') olts = [(x['ip'], x['company']) for x in nodes] pool = Pool(16) lock = Manager().Lock() func = partial(zhongji_entry, lock) list(pool.map(compose(func, get_zhongji), olts)) pool.close() pool.join() ports = (x.split(',') for x in open(result_file)) cmd = """match(n: Olt) where n.ip = {ip} merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) merge(m) - [:Include]->(p: Port{name: {interface}})""" list( map( lambda x: graph.cypher.execute( cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
def add_power_info(): funcs = { 'S8508': S85.get_power_info, 'S8505': S85.get_power_info, 'T64G': T64.get_power_info, 'S8905': S89.get_power_info, 'S8905E': S8905E.get_power_info, 'S9306': S93.get_power_info, 'S9303': S93.get_power_info } get_power_info = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model" ) switches = [dict(ip=x['ip'], model=x['model']) for x in nodes] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_power_info, lock) list(pool.map(compose(_ff, get_power_info), switches)) pool.close() pool.join()
def sample(self, n_samples: int, beta: float = 1.): with Manager() as mgr: queues_work = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)] queues_return = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)] _ = self.parallel_pool.starmap( func=worker_init, iterable=[(queues_work[idx], queues_return[idx], idx, self.samplers[idx]) for idx in range(self.num_chains)]) worker_results = [self.parallel_pool.apply_async(func=worker_run) for _ in range(self.num_chains)] swapped = [None for _ in self.samplers] last_samples = [None for _ in self.samplers] for i_sample in tqdm(range(int(n_samples))): logger.debug('MAIN PROCESS: deploying work...') for idx, beta in enumerate(self.betas): queues_work[idx].put((idx, copy.deepcopy(swapped[idx]), beta, False)) # sample logger.debug('MAIN PROCESS: waiting for return...') for idx in range(len(self.samplers)): idx, last_sample, beta = queues_return[idx].get() # get sample last_samples[idx] = last_sample logger.debug('MAIN PROCESS: swapping samples...') swapped = self.swap_samples(last_samples) # swap samples logger.debug('MAIN PROCESS: swapping samples...') self.adjust_betas(i_sample, swapped, last_samples) # adjust temps # logger.debug('stopping workers...') _ = [queues_work[idx].put((idx, None, 0.00, True)) for idx in range(self.num_chains)] _ = [queues_work[idx].join() for idx in range(self.num_chains)] # logger.debug('reached getting from finalqueue') for worker_result in worker_results: idx, sampler_obj = worker_result.get() logger.debug(f'GATHERED sampler {idx} trace_x: {len(sampler_obj.trace_x)}') self.samplers[idx] = sampler_obj self.parallel_pool.close() self.parallel_pool.join()
def add_traffics(): funcs = { 'S8508': S85.get_traffics, 'S8505': S85.get_traffics, 'T64G': T64.get_traffics, 'S8905': S89.get_traffics, 'S8905E': S8905E.get_traffics, 'S9306': S93.get_traffics, 'S9303': S93.get_traffics } get_traffics = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model" ) switchs = [ dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes ] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_traffics, lock) list(pool.map(compose(_ff, get_traffics), switchs)) pool.close() pool.join()
def folderbase_cut_interval(input_folder, output_folder, cut_period): wav_files = [] if not os.path.exists(output_folder): os.mkdir(output_folder) for root, dirs, files in os.walk(input_folder): for filename in files: wav_files.append(os.path.join(root, filename)) # for file in wav_files: def process_files(lock, file): try: if re.search(".+\.wav", file): with file_num.get_lock(): file_num.value += 1 filebasename = os.path.basename(file) filebasename, _ = os.path.splitext(filebasename) #get audio properties audio_prop = {} with wave.open(file, mode='rb') as newAudio: audio_prop["nchannels"] = newAudio.getnchannels() audio_prop["nframes"] = newAudio.getnframes() audio_prop["sampwidth"] = newAudio.getsampwidth() audio_prop["framerate"] = newAudio.getframerate() audio_prop["comptype"] = newAudio.getcomptype() audio_prop["compname"] = newAudio.getcompname() audio_duration = audio_prop["nframes"] / audio_prop["framerate"] precut_duration = cut_period cut_start = 0 cut_return = 0 cut_num = 0 index = 0 while cut_start < audio_duration: cut_end = cut_start + precut_duration cut_audio, cutaudio_prop = cut_wave(file, cut_start, cut_end, start_bias=0, end_bias=0) newfile = os.path.join( output_folder, filebasename + "_" + str(index) + ".wav") index += 1 with wave.open(newfile, "wb") as newAudio: newAudio.setparams((cutaudio_prop["nchannels"], cutaudio_prop["sampwidth"], cutaudio_prop["framerate"], cutaudio_prop["nframes"], cutaudio_prop["comptype"], cutaudio_prop["compname"])) newAudio.writeframes(cut_audio) cut_start = cut_start + precut_duration with out_file_num.get_lock(): out_file_num.value += 1 os.remove(file) except Exception as e: logging.info(e) with fail_file.get_lock(): fail_file.value += 1 pool = Pool(process_num) m = Manager() lock = m.Lock() locks = [lock] * len(wav_files) pool.map(process_files, locks, wav_files) loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed '''.format(file_num.value, out_file_num.value, fail_file.value) logging.info(loginfo)
# Settings: Location, Units, and rapidfire (optional) latitude = check_env_var("HZN_LAT", printerr=True) longitude = check_env_var("HZN_LON", printerr=True) pws_units = check_env_var("PWS_UNITS", default='us', printerr=True) # weewx recommends only using 'us' pws_wu_loc = check_env_var("PWS_WU_LOC", default='', printerr=True) pws_wu_rapidfire = check_env_var("PWS_WU_RPDF", default='False', printerr=True) # Deal with a potential lower-case (boolean value from Horizon) or erroneous value if pws_wu_rapidfire == "true" or pws_wu_rapidfire == "True": pws_wu_rapidfire = "True" else: pws_wu_rapidfire = "False" ## Shared data structure (dict for flask server to read & serve) manager = Manager() sdata = manager.dict() standard_params = ["wu_id", "stationtype", "model", "latitude", "longitude", "units", "location"] standard_values = [pws_wu_id, pws_station_type, pws_model, latitude, longitude, pws_units, pws_wu_loc] sdata["r"] = dict(zip(["status"], ["Station initializing..."])) sdata["t"] = str(int(time.time())) # Timestamp sdata["i"] = dict(zip(standard_params, standard_values)) # Station Info ## Flask HTTPserver ---------------------------------------------------------- ## Start simple flask server at localhost:port and pass in shared data dict p_flask = Process(target=fl.run_server, args=('0.0.0.0', 8357, sdata)) p_flask.start() ## Weewx service ------------------------------------------------------------- # Modify the weewx configuration file with our env var settings weemod = weewx_mod(weewx_config_file, pws_station_type)
def osint(self,organization,domain,files,ext,scope_file,aws,aws_fixes,html, screenshots,graph,nuke,whoxy_limit,typo,unsafe): """ The OSINT toolkit: This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information. ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history. View the wiki for the full details, reporting information, and lists of API keys. Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include: * Single Address: 8.8.8.8 * Basic CIDR: 8.8.8.0/24 * Nmap-friendly Range: 8.8.8.8-10 * Underscores? OK: 8.8.8.8_8.8.8.10 """ click.clear() click.secho(asciis.print_art(),fg="magenta") click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta") click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",fg="green") # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() rev_domain_list = manager.list() # Create reporter object and generate lists of everything, just IP addresses, and just domains browser = helpers.setup_headless_chrome(unsafe) report = reporter.Reporter(organization,report_path,output_report,browser) report.create_tables() scope,ip_list,domain_list = report.prepare_scope(ip_list,domain_list,scope_file,domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] # Phase 1 jobs company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain,)) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain_list,rev_domain_list,organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Hunter", target=report.create_domain_report_table, args=(organization,scope,ip_list,domain_list,rev_domain_list,whoxy_limit)) jobs.append(domain_report) # Phase 2 jobs shodan_report = Process(name="Shodan Hunter", target=report.create_shodan_table, args=(ip_list,domain_list)) more_jobs.append(shodan_report) if typo: lookalike_report = Process(name="Lookalike Domain Reviewer", target=report.create_lookalike_table, args=(organization,domain)) more_jobs.append(lookalike_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path,browser)) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_metadata_table, args=(domain,ext,report_path)) more_jobs.append(files_report) # Phase 3 jobs cloud_report = Process(name="Cloud Hunter", target=report.create_cloud_table, args=(organization,domain,aws,aws_fixes)) even_more_jobs.append(cloud_report) # Process the lists of jobs in phases, starting with phase 1 click.secho("[+] Beginning initial discovery phase! This could take some time...",fg="green") for job in jobs: click.secho("[+] Starting new process: {}".format(job.name),fg="green") job.start() for job in jobs: job.join() # Wait for phase 1 and then begin phase 2 jobs click.secho("[+] Initial discovery is complete! Proceeding with additional queries...",fg="green") for job in more_jobs: click.secho("[+] Starting new process: {}".format(job.name),fg="green") job.start() for job in more_jobs: job.join() # Wait for phase 2 and then begin phase 3 jobs click.secho("[+] Final phase: checking the cloud and web services...",fg="green") for job in even_more_jobs: click.secho("[+] Starting new process: {}".format(job.name),fg="green") job.start() for job in even_more_jobs: job.join() # All jobs are done, so close out the SQLIte3 database connection report.close_out_reporting() click.secho("[+] Job's done! Your results are in {} and can be viewed and queried with \ any SQLite browser.".format(output_report),fg="green") # Perform additional tasks depending on the user's command line options if graph: graph_reporter = grapher.Grapher(output_report) click.secho("[+] Loading ODIN database file {} for conversion to Neo4j".format(output_report),fg="green") if nuke: if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \ fresh start. Proceed?",fg="red"),default=True): try: graph_reporter.clear_neo4j_database() click.secho("[+] Database successfully wiped!\n",fg="green") except Exception as error: click.secho("[!] Failed to clear the database! Check the Neo4j console and \ your configuration and try running grapher.py again.",fg="red") click.secho("L.. Details: {}".format(error),fg="red") else: click.secho("[!] You can convert your database to a graph database later. \ Run lib/grapher.py with the appropriate options.",fg="red") try: graph_reporter.convert() except Exception as error: click.secho("[!] Failed to convert the database! Check the Neo4j console and \ your configuration and try running grapher.py again.",fg="red") click.secho("L.. Details: {}".format(error),fg="red") if html: click.secho("\n[+] Creating the HTML report using {}.".format(output_report),fg="green") try: html_reporter = htmlreporter.HTMLReporter(organization,report_path + "/html_report/",output_report) html_reporter.generate_full_report() except Exception as error: click.secho("[!] Failed to create the HTML report!",fg="red") click.secho("L.. Details: {}".format(error),fg="red")
from multiprocess import Manager, Process def fun(d, l): d[1] = '1' d[2] = 2 d[0.25] = None l.reverse() if __name__ == '__main__': manager = Manager() d = manager.dict() l = manager.list(range(10)) p = Process(target=fun, args=(d, l)) p.start() p.join() print d print l
def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0): rank = len(inputData.shape) - 1 if rank != self.n_inputDimensions: raise ValueError( "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format( self.n_inputDimensions)) manager = Manager() predictQueue = manager.Queue() # workaround as predict does not support batches atm # add dummy dimension to let embedInputData work properly (is optimized to work for batches) inputData = inputData.reshape(1, *inputData.shape) modifiedInputData = self._embedInputData(inputData) modifiedInputData = modifiedInputData[0] inputData = inputData[0] self.transientTime = transientTime self.sharedNamespace.transientTime = transientTime predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime)) jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]), axis=rank).reshape(-1, rank).tolist() nJobs = len(jobs) self.resetState() iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self) pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess, initargs=[predictQueue, self]) pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1) def _processPoolWorkerResults(): nJobsDone = 0 if verbose > 0: bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001) bar.update(0) while nJobsDone < nJobs: data = predictQueue.get() # result of predicting indices, prediction, state = data id = self._uniqueIDFromIndices(indices) self._xs[id] = state # update the values predictionOutput[tuple([Ellipsis] + indices)] = prediction nJobsDone += 1 if verbose > 0: bar.update(nJobsDone) if verbose > 1: print(nJobsDone) if verbose > 0: bar.finish() _processPoolWorkerResults() pool.close() return predictionOutput
def fit(self, inputData, outputData, transientTime=0, verbose=0): rank = len(inputData.shape) - 1 if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1: raise ValueError( "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format( self.n_inputDimensions)) # reshape the input so that it has the shape (timeseries, time, input_dimension^n) if rank == self.n_inputDimensions: inputData = inputData.reshape(1, *inputData.shape) outputData = outputData.reshape(1, *outputData.shape) else: # modify rank again rank -= 1 partialLength = (inputData.shape[1] - transientTime) totalLength = inputData.shape[0] * partialLength timeseriesCount = inputData.shape[0] manager = Manager() fitQueue = manager.Queue() modifiedInputData = self._embedInputData(inputData) self.sharedNamespace.transientTime = transientTime self.sharedNamespace.partialLength = partialLength self.sharedNamespace.totalLength = totalLength self.sharedNamespace.timeseriesCount = timeseriesCount jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]), axis=rank).reshape(-1, rank).tolist() nJobs = len(jobs) self.resetState() iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self) pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self]) pool.map_async(self._fitProcess, iterator, chunksize=16) def _processPoolWorkerResults(): nJobsDone = 0 if verbose > 0: bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001) bar.update(0) while nJobsDone < nJobs: data = fitQueue.get() # result of fitting indices, x, WOut = data id = self._uniqueIDFromIndices(indices) if WOut is None: import sys print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr) # store WOut if self._averageOutputWeights: if WOut is not None: self._WOut += WOut / np.prod(self.inputShape) else: self._WOuts[id] = WOut # store x self._xs[id] = x nJobsDone += 1 if verbose > 0: bar.update(nJobsDone) if verbose > 1: print(nJobsDone) if verbose > 0: bar.finish() _processPoolWorkerResults() pool.close()
def __init__(self, inputShape, n_reservoir, filterSize=1, stride=1, borderMode="mirror", nWorkers="auto", spectralRadius=1.0, noiseLevel=0.0, inputScaling=None, leakingRate=1.0, reservoirDensity=0.2, randomSeed=None, averageOutputWeights=True, out_activation=lambda x: x, out_inverse_activation=lambda x: x, weightGeneration='naive', bias=1.0, outputBias=1.0, outputInputScaling=1.0, inputDensity=1.0, solver='pinv', regressionParameters={}, activation=B.tanh, activationDerivation=lambda x: 1.0 / B.cosh(x) ** 2): self._averageOutputWeights = averageOutputWeights if averageOutputWeights and solver != "lsqr": raise ValueError( "`averageOutputWeights` can only be set to `True` when `solver` is set to `lsqr` (Ridge Regression)") self._borderMode = borderMode if not borderMode in ["mirror", "padding", "edge", "wrap"]: raise ValueError( "`borderMode` must be set to one of the following values: `mirror`, `padding`, `edge` or `wrap`.") self._regressionParameters = regressionParameters self._solver = solver n_inputDimensions = len(inputShape) if filterSize % 2 == 0: raise ValueError("filterSize has to be an odd number (1, 3, 5, ...).") self._filterSize = filterSize self._filterWidth = int(np.floor(filterSize / 2)) self._stride = stride self._n_input = int(np.power(np.ceil(filterSize / stride), n_inputDimensions)) self.n_inputDimensions = n_inputDimensions self.inputShape = inputShape if not self._averageOutputWeights: self._WOuts = B.empty((np.prod(inputShape), 1, self._n_input + n_reservoir + 1)) self._WOut = None else: self._WOuts = None self._WOut = B.zeros((1, self._n_input + n_reservoir + 1)) self._xs = B.empty((np.prod(inputShape), n_reservoir, 1)) if nWorkers == "auto": self._nWorkers = np.max((cpu_count() - 1, 1)) else: self._nWorkers = nWorkers manager = Manager() self.sharedNamespace = manager.Namespace() if hasattr(self, "fitWorkerID") == False or self.parallelWorkerIDs is None: self.parallelWorkerIDs = manager.Queue() for i in range(self._nWorkers): self.parallelWorkerIDs.put((i)) super(SpatioTemporalESN, self).__init__(n_input=self._n_input, n_reservoir=n_reservoir, n_output=1, spectralRadius=spectralRadius, noiseLevel=noiseLevel, inputScaling=inputScaling, leakingRate=leakingRate, reservoirDensity=reservoirDensity, randomSeed=randomSeed, out_activation=out_activation, out_inverse_activation=out_inverse_activation, weightGeneration=weightGeneration, bias=bias, outputBias=outputBias, outputInputScaling=outputInputScaling, inputDensity=inputDensity, activation=activation, activationDerivation=activationDerivation) """
class Storage(object): ''' Storage system ''' def __init__(self): # The given page size self._PAGE_SIZE = 4096 # The given size for data blocks self._BLOCK_SIZE = 1 * self._PAGE_SIZE # Meta data about datasets self._dataset_table = {} # Read/write head position self._position = 0 # Manager for concurrency self.manager = Manager() # Job-queue for reading data self.job_queue = self.manager.list() # Data queueueueuueue self.data_queues = self.manager.dict() # Path to storage file _path = 'data.data' # Size of storage (Default 200 mb) self._SIZE = 4096 * 256 * 200 # Amount of blocks self._BLOCKS = math.floor(self._SIZE / self._BLOCK_SIZE) # Check whether a storage file exists, else create one if not os.path.exists(_path): print('Writing storage file') f = open(_path, 'w+b') f.write(b'?' * self._SIZE) f.close # Open storage and create a MMAP try: storage = open(_path, 'a+b') except: print('Cannot open storage file!') # Create MMAP to file self.datamap = mmap.mmap(storage.fileno(), 0) # Free space vector self.free_space =[(0, self._BLOCKS)] def _write_data(self, address, data_block, flush=True): ''' Writes a data block to the page at the given address ''' print('¤ Writing data block at ' + str(address)) try: # Go to the current address self.datamap.seek(address) self._position = address # Write the block self.datamap.write(bytes(data_block, 'utf-8')) except: print('! Could not write data block to ' + str(address) + '. Not enough space.') # Flush the written data to the file if flush: try: self.datamap.flush() except: print("Cannot flush data with mmap!") pass def _read_block(self, address): ''' Writes data to a given address ''' print('+ Reading data from ' + str(address)) data = '' try: # Go to the current address self.datamap.seek(address) self._position = address # Read the data data = self.datamap.read(self._PAGE_SIZE) except: print('Could not read data block from ' + str(address)) return data def _worst_fit(self, n_blocks): ''' Data block allocation using worst-fit ''' # Get the largest free segment #! Faster to use max-heaps largest_segment = sorted(self.free_space, key=lambda x: x[1])[0] blocks_amount = largest_segment[1] assert blocks_amount >= n_blocks # Construct a list of free datablocks free_blocks = [] current_block = largest_segment[0] for _ in range(n_blocks): free_blocks.append(current_block) current_block += self._BLOCK_SIZE # Remove the free space and add the remaining # free space after allocation self.free_space.remove(largest_segment) self.free_space.append((current_block, blocks_amount - n_blocks)) return free_blocks def _request_blocks(self, n_blocks): return self._worst_fit(n_blocks) def get_size(self, dataset_id): ''' Get the amount of blocks in a dataset ''' return self._dataset_table[dataset_id].size def append_data(self, dataset_id, data_block, address, flush=True): ''' Append data to an existing dataset ''' # Check if there is any more allocated space # for the dataset if self._dataset_table[dataset_id].space_left(): # Write data block and increament size self._write_data(address, data_block, flush) self._dataset_table[dataset_id].size+=1 return address def add_dataset(self, dataset_id, dataset, size=None): ''' Add a new dataset to the storage ''' # Add metadata about the dataset if size: current_size = size else: current_size = len(dataset) self._dataset_table[dataset_id] = Dataset(current_size) requested_blocks = self._request_blocks(current_size) assert len(requested_blocks) >= len(dataset) # Write the data blocks to a file block_index = 0 for data_block in dataset: self.append_data(dataset_id, data_block, requested_blocks[block_index], flush=False) self._dataset_table[dataset_id].append_block_index(requested_blocks[block_index]) block_index += 1 try: self.datamap.flush() except: print("Cannot flush data with mmap!") pass def read_data(self, dataset_id, data_queue): ''' Run the execution-queue for a given dataset ''' # Generate a random id (6 characters) data_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6)) dataset = self._dataset_table[dataset_id] self.data_queues[data_id] = data_queue for address in dataset.datablocks: self.job_queue.append((address, data_id)) return dataset.datablocks def reader(self): ''' A reading process, which serves data blocks requests from read_data ''' while True: # Sort the list of jobs by their address jobs = sorted(self.job_queue, key=lambda x: x[0]) try: # Find the job with the closest highest address (address, data_id) = next(x for x in jobs if x[0] >= self._position) # Read the data from disc data = self._read_block(address) # Serve data to the requesting process self.data_queues[data_id].put(data) # Remove the job from the list self.job_queue.remove((address, data_id)) except: # No jobs found. Start from position 0. self._position = 0 time.sleep(0.01)
def osint(self, organization, domain, files, ext, delete, scope_file, aws, aws_fixes, html, screenshots, graph, nuke, whoxy_limit): """ The OSINT toolkit:\n This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information. ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history. View the README for the full detailsand lists of API keys! Note: If providing a scope file, acceptable IP addresses/ranges include: * Single Address: 8.8.8.8 * Basic CIDR: 8.8.8.0/24 * Nmap-friendly Range: 8.8.8.8-10 * Underscores? OK: 8.8.8.8_8.8.8.10 """ click.clear() asciis.print_art() print(green("[+] OSINT Module Selected: ODIN will run all recon modules.")) verbose = None if verbose: print( yellow( "[*] Verbose output Enabled -- Enumeration of RDAP contact information \ is enabled, so you may get a lot of it if scope includes a large cloud provider." )) # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() # Create reporter object and generate final list, the scope from scope file report = reporter.Reporter(report_path, output_report) report.create_tables() scope, ip_list, domain_list = report.prepare_scope( ip_list, domain_list, scope_file, domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain, )) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain_list, organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Address Recon", target=report.create_domain_report_table, args=(organization, scope, ip_list, domain_list, whoxy_limit)) jobs.append(domain_report) shodan_report = Process(name="Shodan Queries", target=report.create_shodan_table, args=(ip_list, domain_list)) more_jobs.append(shodan_report) urlcrazy_report = Process(name="Domain Squatting Recon", target=report.create_urlcrazy_table, args=(organization, domain)) more_jobs.append(urlcrazy_report) cloud_report = Process(name="Cloud Recon", target=report.create_cloud_table, args=(organization, domain, aws, aws_fixes)) even_more_jobs.append(cloud_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path, )) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_foca_table, args=(domain, ext, delete, report_path, verbose)) more_jobs.append(files_report) print( green( "[+] Beginning initial discovery phase! This could take some time..." )) for job in jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in jobs: job.join() print( green( "[+] Initial discovery is complete! Proceeding with additional queries..." )) for job in more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in more_jobs: job.join() print(green("[+] Final phase: checking the cloud and web services...")) for job in even_more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in even_more_jobs: job.join() report.close_out_reporting() print( green( "[+] Job's done! Your results are in {} and can be viewed and queried with \ any SQLite browser.".format(output_report))) if graph: graph_reporter = grapher.Grapher(output_report) print( green( "[+] Loading ODIN database file {} for conversion to Neo4j" ).format(output_report)) if nuke: confirm = input( red("\n[!] You set the --nuke option. This wipes out all nodes \ for a fresh start. Proceed? (Y\\N) ")) if confirm.lower() == "y": graph_reporter.clear_neo4j_database() print(green("[+] Database successfully wiped!\n")) graph_reporter.convert() else: print( red("[!] Then you can convert your database to a graph database later. \ Run lib/grapher.py with the appropriate options.")) else: graph_reporter.convert() if html: print( green("\n[+] Creating the HTML report using {}.".format( output_report))) html_reporter = htmlreporter.HTMLReporter( organization, report_path + "/html_report/", output_report) html_reporter.generate_full_report()