def spin_crawl_threads(state, classifiers, MAX_BIT_SIZE, MAX_DL_THREADS, image_path): print("Running threads...") manager = Manager() location_q = manager.Queue(maxsize=16) image_q = manager.Queue(maxsize=64) state_lock = manager.Lock() generate_location = Process(target=generate_location_thread, args=(location_q, MAX_BIT_SIZE), name="generate_location") classification = Process(target=classification_thread, args=(image_q, classifiers, image_path, state, state_lock), name="classification") download_image_t = Process(target=download_image_thread, args=(location_q, image_q, MAX_DL_THREADS), name="download_image") download_image_t.start() classification.start() generate_location.start() def kill_threads(): for thread in active_children(): thread.terminate() atexit.register(kill_threads) download_image_t.join() classification.join() generate_location.join()
def fit(self, X, Y): assert not self.fit_done assert len(X) == len(Y) possible_labels = list(set(y_val for y in Y for y_val in y)) job_labels = np.array_split(possible_labels, self.n_jobs) with Manager() as manager: X_proxy = manager.list(X) Y_proxy = manager.list(Y) output_queue = Queue() processes = [ Process(target=sequential_execute, args=(output_queue, get_binary_clf_from_multilabel, [{ 'X': X_proxy, 'Y': Y_proxy, 'label': lbl, 'return_label': True } for lbl in job])) for job in job_labels ] [p.start() for p in processes] results = [output_queue.get() for lbl in possible_labels] # needs to be flattened [p.join() for p in processes] self.classifiers = dict(results) self.fit_done = True
def main(): """ main """ file_to_attack = './data/example_files/S_hecht_submission_3.csv' method_order = 'param' nb_element = 1 month_spliter(file_to_attack) manager = Manager() queue_list = [manager.Queue(1) for _ in range(13)] with Pool(6) as p: print("HEY") p.map(maker, [i for i in range(13)], queue_list) for queue in queue_list: GUESS_PART.append(queue.get()) GUESS_PART.sort() for i in range(NB_MONTH): char = "guess_par_t" + str(i) + ".json" with open(char, "w") as jsdump: json.dump(GUESS_PART[i][1], jsdump, indent=4) write_csv(GUESS_PART[0][1], GUESS_PART[1][1], GUESS_PART[2][1], GUESS_PART[3][1], GUESS_PART[4][1], GUESS_PART[5][1], GUESS_PART[6][1], GUESS_PART[7][1], GUESS_PART[8][1], GUESS_PART[9][1], GUESS_PART[10][1], GUESS_PART[11][1], GUESS_PART[12][1])
def add_progress_counter(self, init_mess="Beginning", end_mess="Done", name_things='things', perc_interv=5): self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess, nmb_max=len(self.data), name_things=name_things, perc_interv=perc_interv) self.manager = Manager() self.manager.register("PG", self.PG)
def ospf_check(): clear_log() devices = [x.split(',')[0] for x in open(devicesFile)] pool = Pool(processor) lock = Manager().Lock() list(pool.map(partial(_inf_ospf_check, lock), devices)) pool.close() pool.join()
def svlan_check(): clear_log() # nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246') nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='company', property_value='zte') olts = [(x['ip'], x['company'], x['area']) for x in nodes] # list(map(compose(card_entry, get_card), olts)) pool = Pool(16) lock = Manager().Lock() func = partial(svlan_entry, lock) list(pool.map(compose(func, get_svlan), olts)) pool.close() pool.join()
def interface_check_m(): clear_log() # cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model" cmd = "match(s: Switch) return s.ip, s.model" # cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2" nodes = graph.cypher.execute(cmd) switchs = [(x[0], x[1]) for x in nodes] pool = Pool(16) lock = Manager().Lock() out_inf = partial(output_interface_m, lock) list(pool.map(compose(out_inf, get_interface), switchs)) pool.close() pool.join()
def zte_gpon_svlan_check(): clear_log() nodes = graph.cypher.execute( "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)" ) olts = ((x[0], x[1]) for x in nodes) lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1]) pool = Pool(8) lock = Manager().Lock() func = partial(svlan_entry, lock) list(pool.map(compose(func, lzte_gpon_svlan), olts)) pool.close() pool.join()
def run_parallel(self, test_suites, test_runner, result_type=None, results_path=None): exit_code = 0 proc = None unittest.installHandler() processes = [] manager = Manager() results = manager.dict() manager.dict() start = time.time() test_mapping = {} for test_suite in test_suites: # Give each test suite an uuid so it can be # matched to the correct test result test_id = str(uuid.uuid4()) test_mapping[test_id] = test_suite proc = Process(target=self.execute_test, args=(test_runner, test_id, test_suite, results)) processes.append(proc) proc.start() for proc in processes: proc.join() finish = time.time() errors, failures, _ = self.dump_results(start, finish, results) if result_type is not None: all_results = [] for test_id, result in list(results.items()): tests = test_mapping[test_id] result_parser = SummarizeResults(vars(result), tests, (finish - start)) all_results += result_parser.gather_results() reporter = Reporter(result_parser=result_parser, all_results=all_results) reporter.generate_report(result_type=result_type, path=results_path) if failures or errors: exit_code = 1 return exit_code
def query(query_lst): manager = Manager() hits = manager.dict() results = [] for q in query_lst: r = requests.get('http://dblp.uni-trier.de/search/publ/api', params={ 'q': q, 'h': 100, 'format': 'json' }) if r.status_code == 429: raise Error json_answer = r.json() res = json_answer["result"]["hits"].get("hit", None) if res is None: continue results += res def f(d, hit, n): if hit is None: return authors = hit["info"].pop("authors") if isinstance(authors["author"], dict): hit["info"]["authors"] = authors["author"]["text"] else: hit["info"]["authors"] = [ fullname(a["text"]) for a in authors["author"] ] hit["info"]["bibtex"] = get_bib(hit["info"]["key"]) d[n] = hit["info"] job = [ Process(target=f, args=(hits, hit, n)) for n, hit in enumerate(results) ] _ = [p.start() for p in job] _ = [p.join() for p in job] return dict(hits)
def get_city_states(self): """ Creates city states from start time to end time :param: :return: """ city_states = [] start_time = self.start_time end_time = self.end_time # Create array of time slice values between the start and end time business_days = self.config['city_state_creator']['business_days'] business_hours_start = self.config['city_state_creator'][ 'business_hours_start'] business_hours_end = self.config['city_state_creator'][ 'business_hours_end'] index = pd.date_range(start=start_time, end=end_time, freq=str(self.time_unit_duration) + 'min') # Filter only the required days and hours index = index[index.day_name().isin(business_days)] index = index[(index.hour >= business_hours_start) & (index.hour <= business_hours_end)] time_slice_starts = index - timedelta( minutes=self.time_slice_duration / 2) time_slice_ends = index + timedelta(minutes=self.time_slice_duration / 2) # Create arguments dictionary for parallelization self.parallel_args = self.create_parallel_args(index, time_slice_starts, time_slice_ends) # Create city states manager = Manager() city_states = manager.dict() N = len(index.values) # Create parallel pool self.logger.info("Creating parallelization pool") pool = ProcessPool(nodes=25) pool.map(self.get_city_state, ([city_states, t] for t in xrange(N))) pool.close() pool.join() pool.clear() self.logger.info("Finished creating city states") return dict(city_states)
def hostname_check(): clear_log() nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46') olts = [(x['ip'], x['company']) for x in nodes] pool = Pool(16) lock = Manager().Lock() func = partial(hostname_entry, lock) list(pool.map(compose(func, get_hostname), olts)) pool.close() pool.join() ip_hostname = (x.split(',') for x in open(result_file)) cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}" list( map(lambda x: graph.cypher.execute(cmd, ip=x[0], hostname=x[1]), ip_hostname))
def create_csv(self): if __name__ == '__main__': t1 = time() file1 = open(self.out_csv1, "w") file1.write("id" + ',' + "level" + '\n') file2 = open(self.out_csv2, "w") file2.write("id" + ',' + "object_name" + '\n') file1.close() file2.close() i = range(len(self.list_of_zips)) p = Pool() m = Manager() l = m.Lock() func = partial(self.parse_Zip, l) p.map(func, i) p.close() p.join() print('Create .csv files time = ' + str(time() - t1) + 's')
def zhongji_check(): clear_log() nodes = graph.find('Olt') # nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46') olts = [(x['ip'], x['company']) for x in nodes] pool = Pool(16) lock = Manager().Lock() func = partial(zhongji_entry, lock) list(pool.map(compose(func, get_zhongji), olts)) pool.close() pool.join() ports = (x.split(',') for x in open(result_file)) cmd = """match(n: Olt) where n.ip = {ip} merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) merge(m) - [:Include]->(p: Port{name: {interface}})""" list( map( lambda x: graph.cypher.execute( cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
def run_post_process(): es = ES(FLAGS.configfile_name) manager=Manager() lock=manager.Lock() shared_dict=manager.dict({'time':0,"id":""}) process_num=int(cpu_count()-2) generator_list=[] for i in range(process_num): generator_list.append(_generator(lock,shared_dict,es)) #%% p=[] for i in range(process_num): p.append(Process(target=_process_unknown_record,args=(generator_list[i],))) p[i].start() for q in p: q.join()
def download_image_thread(location_q, image_q, MAX_DL_THREADS=10): print("Running Download Image Thread.") max_processes = MAX_DL_THREADS print("Creating a thread pool of size {} for downloading images...".format(max_processes)) pool = Pool(processes=max_processes) # Allow us to have n processes runnning, and n processes scheduled to run # TODO: Manager is not necessary here, but is used to get around the fact # that thread-safe objects cannot be passed by reference, they must be # inheretence. A more lightweight solution should be found workers = Manager().Semaphore(max_processes*2) def async_download(location): image = download_image(location) image_q.put((location, image), True) workers.release() while True: location = location_q.get(True) workers.acquire() pool.apply_async(async_download, (location,))
def folderbase_cut_silence(input_folder, cut_interval): output_no_silence = os.path.join(input_folder, "remove_silence") # if not os.path.exists(output_folder): # os.mkdir(output_folder) if not os.path.exists(output_no_silence): os.mkdir(output_no_silence) wav_files = [] for root, dirs, files in os.walk(input_folder): for filename in files: wav_files.append(filename) def process_files(lock, file): try: #exclude log.txt file if re.search(".+\.wav", file): wave_file = os.path.join(input_folder, file) wo_num = cut_wav_without_silence(wave_file, output_no_silence, cut_interval) with cut_silence_file_num.get_lock(): cut_silence_file_num.value += 1 with cut_silence_out_file_num.get_lock(): cut_silence_out_file_num.value += wo_num os.remove(wave_file) except Exception as e: logging.info(e) with cut_silence_fail_file.get_lock(): cut_silence_fail_file.value += 1 pool = Pool(process_num) m = Manager() lock = m.Lock() locks = [lock] * len(wav_files) pool.map(process_files, locks, wav_files) loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed '''.format(cut_silence_file_num.value, cut_silence_out_file_num.value, cut_silence_fail_file.value) logging.info(loginfo)
def __init__(self, initial_patterns_list, input_text_file, data_source_type, read_file_format='rb'): self.read_format = read_file_format self.pattern_to_data = {} #defaultdict(PatternData) patterns_data = [ PatternData(pattern) for pattern in initial_patterns_list ] self.pattern_to_data = Manager().dict( {pattern.formatted: pattern for pattern in patterns_data}) patterns_len = [ len(pattern.split()) for pattern in self.pattern_to_data.keys() ] self.min_pattern_len = min(patterns_len) self.max_pattern_len = max(patterns_len) self.data_wrapper = data_wrapper_factory(input_text_file, data_source_type)
def add_power_info(): funcs = { 'S8508': S85.get_power_info, 'S8505': S85.get_power_info, 'T64G': T64.get_power_info, 'S8905': S89.get_power_info, 'S8905E': S8905E.get_power_info, 'S9306': S93.get_power_info, 'S9303': S93.get_power_info } get_power_info = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model" ) switches = [dict(ip=x['ip'], model=x['model']) for x in nodes] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_power_info, lock) list(pool.map(compose(_ff, get_power_info), switches)) pool.close() pool.join()
def sample(self, n_samples: int, beta: float = 1.): with Manager() as mgr: queues_work = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)] queues_return = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)] _ = self.parallel_pool.starmap( func=worker_init, iterable=[(queues_work[idx], queues_return[idx], idx, self.samplers[idx]) for idx in range(self.num_chains)]) worker_results = [self.parallel_pool.apply_async(func=worker_run) for _ in range(self.num_chains)] swapped = [None for _ in self.samplers] last_samples = [None for _ in self.samplers] for i_sample in tqdm(range(int(n_samples))): logger.debug('MAIN PROCESS: deploying work...') for idx, beta in enumerate(self.betas): queues_work[idx].put((idx, copy.deepcopy(swapped[idx]), beta, False)) # sample logger.debug('MAIN PROCESS: waiting for return...') for idx in range(len(self.samplers)): idx, last_sample, beta = queues_return[idx].get() # get sample last_samples[idx] = last_sample logger.debug('MAIN PROCESS: swapping samples...') swapped = self.swap_samples(last_samples) # swap samples logger.debug('MAIN PROCESS: swapping samples...') self.adjust_betas(i_sample, swapped, last_samples) # adjust temps # logger.debug('stopping workers...') _ = [queues_work[idx].put((idx, None, 0.00, True)) for idx in range(self.num_chains)] _ = [queues_work[idx].join() for idx in range(self.num_chains)] # logger.debug('reached getting from finalqueue') for worker_result in worker_results: idx, sampler_obj = worker_result.get() logger.debug(f'GATHERED sampler {idx} trace_x: {len(sampler_obj.trace_x)}') self.samplers[idx] = sampler_obj self.parallel_pool.close() self.parallel_pool.join()
def add_traffics(): funcs = { 'S8508': S85.get_traffics, 'S8505': S85.get_traffics, 'T64G': T64.get_traffics, 'S8905': S89.get_traffics, 'S8905E': S8905E.get_traffics, 'S9306': S93.get_traffics, 'S9303': S93.get_traffics } get_traffics = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model" ) switchs = [ dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes ] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_traffics, lock) list(pool.map(compose(_ff, get_traffics), switchs)) pool.close() pool.join()
def folderbase_convert_to_wave(webmfolder, wavefolder): def process_convert(lock, filename): my_logger.debug("filename is {}".format(filename)) with total_num.get_lock(): total_num.value += 1 try: success = convert_to_wav(filename, wavefolder) with success_num.get_lock(): success_num.value += success os.remove(filename) except Exception as e: line = "\t".join([str(datetime.datetime.now()), filename, str(e)]) my_logger.info(line) fail_folder = "data/convert_failed" if not os.path.exists(fail_folder): os.mkdir(fail_folder) filebase = os.path.basename(filename) failed_file = os.path.join(fail_folder, filebase) os.rename(filename, failed_file) with fail_num.get_lock(): fail_num.value += 1 return 1 filenames = [] for file in mp3gen(webmfolder): if re.search("wav", file): continue filenames.append(file) pool = Pool(process_num) m = Manager() lock = m.Lock() locks = [lock] * len(filenames) pool.map(process_convert, locks, filenames) my_logger.info( "{}/{} files successfully converted to wave and {} files failed". format(success_num.value, total_num.value, fail_num.value))
def folderbase_cut_interval(input_folder, output_folder, cut_period): wav_files = [] if not os.path.exists(output_folder): os.mkdir(output_folder) for root, dirs, files in os.walk(input_folder): for filename in files: wav_files.append(os.path.join(root, filename)) # for file in wav_files: def process_files(lock, file): try: if re.search(".+\.wav", file): with file_num.get_lock(): file_num.value += 1 filebasename = os.path.basename(file) filebasename, _ = os.path.splitext(filebasename) #get audio properties audio_prop = {} with wave.open(file, mode='rb') as newAudio: audio_prop["nchannels"] = newAudio.getnchannels() audio_prop["nframes"] = newAudio.getnframes() audio_prop["sampwidth"] = newAudio.getsampwidth() audio_prop["framerate"] = newAudio.getframerate() audio_prop["comptype"] = newAudio.getcomptype() audio_prop["compname"] = newAudio.getcompname() audio_duration = audio_prop["nframes"] / audio_prop["framerate"] precut_duration = cut_period cut_start = 0 cut_return = 0 cut_num = 0 index = 0 while cut_start < audio_duration: cut_end = cut_start + precut_duration cut_audio, cutaudio_prop = cut_wave(file, cut_start, cut_end, start_bias=0, end_bias=0) newfile = os.path.join( output_folder, filebasename + "_" + str(index) + ".wav") index += 1 with wave.open(newfile, "wb") as newAudio: newAudio.setparams((cutaudio_prop["nchannels"], cutaudio_prop["sampwidth"], cutaudio_prop["framerate"], cutaudio_prop["nframes"], cutaudio_prop["comptype"], cutaudio_prop["compname"])) newAudio.writeframes(cut_audio) cut_start = cut_start + precut_duration with out_file_num.get_lock(): out_file_num.value += 1 os.remove(file) except Exception as e: logging.info(e) with fail_file.get_lock(): fail_file.value += 1 pool = Pool(process_num) m = Manager() lock = m.Lock() locks = [lock] * len(wav_files) pool.map(process_files, locks, wav_files) loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed '''.format(file_num.value, out_file_num.value, fail_file.value) logging.info(loginfo)
# Settings: Location, Units, and rapidfire (optional) latitude = check_env_var("HZN_LAT", printerr=True) longitude = check_env_var("HZN_LON", printerr=True) pws_units = check_env_var("PWS_UNITS", default='us', printerr=True) # weewx recommends only using 'us' pws_wu_loc = check_env_var("PWS_WU_LOC", default='', printerr=True) pws_wu_rapidfire = check_env_var("PWS_WU_RPDF", default='False', printerr=True) # Deal with a potential lower-case (boolean value from Horizon) or erroneous value if pws_wu_rapidfire == "true" or pws_wu_rapidfire == "True": pws_wu_rapidfire = "True" else: pws_wu_rapidfire = "False" ## Shared data structure (dict for flask server to read & serve) manager = Manager() sdata = manager.dict() standard_params = ["wu_id", "stationtype", "model", "latitude", "longitude", "units", "location"] standard_values = [pws_wu_id, pws_station_type, pws_model, latitude, longitude, pws_units, pws_wu_loc] sdata["r"] = dict(zip(["status"], ["Station initializing..."])) sdata["t"] = str(int(time.time())) # Timestamp sdata["i"] = dict(zip(standard_params, standard_values)) # Station Info ## Flask HTTPserver ---------------------------------------------------------- ## Start simple flask server at localhost:port and pass in shared data dict p_flask = Process(target=fl.run_server, args=('0.0.0.0', 8357, sdata)) p_flask.start() ## Weewx service ------------------------------------------------------------- # Modify the weewx configuration file with our env var settings weemod = weewx_mod(weewx_config_file, pws_station_type)
def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0): rank = len(inputData.shape) - 1 if rank != self.n_inputDimensions: raise ValueError( "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format( self.n_inputDimensions)) manager = Manager() predictQueue = manager.Queue() # workaround as predict does not support batches atm # add dummy dimension to let embedInputData work properly (is optimized to work for batches) inputData = inputData.reshape(1, *inputData.shape) modifiedInputData = self._embedInputData(inputData) modifiedInputData = modifiedInputData[0] inputData = inputData[0] self.transientTime = transientTime self.sharedNamespace.transientTime = transientTime predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime)) jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]), axis=rank).reshape(-1, rank).tolist() nJobs = len(jobs) self.resetState() iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self) pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess, initargs=[predictQueue, self]) pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1) def _processPoolWorkerResults(): nJobsDone = 0 if verbose > 0: bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001) bar.update(0) while nJobsDone < nJobs: data = predictQueue.get() # result of predicting indices, prediction, state = data id = self._uniqueIDFromIndices(indices) self._xs[id] = state # update the values predictionOutput[tuple([Ellipsis] + indices)] = prediction nJobsDone += 1 if verbose > 0: bar.update(nJobsDone) if verbose > 1: print(nJobsDone) if verbose > 0: bar.finish() _processPoolWorkerResults() pool.close() return predictionOutput
def fit(self, inputData, outputData, transientTime=0, verbose=0): rank = len(inputData.shape) - 1 if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1: raise ValueError( "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format( self.n_inputDimensions)) # reshape the input so that it has the shape (timeseries, time, input_dimension^n) if rank == self.n_inputDimensions: inputData = inputData.reshape(1, *inputData.shape) outputData = outputData.reshape(1, *outputData.shape) else: # modify rank again rank -= 1 partialLength = (inputData.shape[1] - transientTime) totalLength = inputData.shape[0] * partialLength timeseriesCount = inputData.shape[0] manager = Manager() fitQueue = manager.Queue() modifiedInputData = self._embedInputData(inputData) self.sharedNamespace.transientTime = transientTime self.sharedNamespace.partialLength = partialLength self.sharedNamespace.totalLength = totalLength self.sharedNamespace.timeseriesCount = timeseriesCount jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]), axis=rank).reshape(-1, rank).tolist() nJobs = len(jobs) self.resetState() iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self) pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self]) pool.map_async(self._fitProcess, iterator, chunksize=16) def _processPoolWorkerResults(): nJobsDone = 0 if verbose > 0: bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001) bar.update(0) while nJobsDone < nJobs: data = fitQueue.get() # result of fitting indices, x, WOut = data id = self._uniqueIDFromIndices(indices) if WOut is None: import sys print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr) # store WOut if self._averageOutputWeights: if WOut is not None: self._WOut += WOut / np.prod(self.inputShape) else: self._WOuts[id] = WOut # store x self._xs[id] = x nJobsDone += 1 if verbose > 0: bar.update(nJobsDone) if verbose > 1: print(nJobsDone) if verbose > 0: bar.finish() _processPoolWorkerResults() pool.close()
def __init__(self, inputShape, n_reservoir, filterSize=1, stride=1, borderMode="mirror", nWorkers="auto", spectralRadius=1.0, noiseLevel=0.0, inputScaling=None, leakingRate=1.0, reservoirDensity=0.2, randomSeed=None, averageOutputWeights=True, out_activation=lambda x: x, out_inverse_activation=lambda x: x, weightGeneration='naive', bias=1.0, outputBias=1.0, outputInputScaling=1.0, inputDensity=1.0, solver='pinv', regressionParameters={}, activation=B.tanh, activationDerivation=lambda x: 1.0 / B.cosh(x) ** 2): self._averageOutputWeights = averageOutputWeights if averageOutputWeights and solver != "lsqr": raise ValueError( "`averageOutputWeights` can only be set to `True` when `solver` is set to `lsqr` (Ridge Regression)") self._borderMode = borderMode if not borderMode in ["mirror", "padding", "edge", "wrap"]: raise ValueError( "`borderMode` must be set to one of the following values: `mirror`, `padding`, `edge` or `wrap`.") self._regressionParameters = regressionParameters self._solver = solver n_inputDimensions = len(inputShape) if filterSize % 2 == 0: raise ValueError("filterSize has to be an odd number (1, 3, 5, ...).") self._filterSize = filterSize self._filterWidth = int(np.floor(filterSize / 2)) self._stride = stride self._n_input = int(np.power(np.ceil(filterSize / stride), n_inputDimensions)) self.n_inputDimensions = n_inputDimensions self.inputShape = inputShape if not self._averageOutputWeights: self._WOuts = B.empty((np.prod(inputShape), 1, self._n_input + n_reservoir + 1)) self._WOut = None else: self._WOuts = None self._WOut = B.zeros((1, self._n_input + n_reservoir + 1)) self._xs = B.empty((np.prod(inputShape), n_reservoir, 1)) if nWorkers == "auto": self._nWorkers = np.max((cpu_count() - 1, 1)) else: self._nWorkers = nWorkers manager = Manager() self.sharedNamespace = manager.Namespace() if hasattr(self, "fitWorkerID") == False or self.parallelWorkerIDs is None: self.parallelWorkerIDs = manager.Queue() for i in range(self._nWorkers): self.parallelWorkerIDs.put((i)) super(SpatioTemporalESN, self).__init__(n_input=self._n_input, n_reservoir=n_reservoir, n_output=1, spectralRadius=spectralRadius, noiseLevel=noiseLevel, inputScaling=inputScaling, leakingRate=leakingRate, reservoirDensity=reservoirDensity, randomSeed=randomSeed, out_activation=out_activation, out_inverse_activation=out_inverse_activation, weightGeneration=weightGeneration, bias=bias, outputBias=outputBias, outputInputScaling=outputInputScaling, inputDensity=inputDensity, activation=activation, activationDerivation=activationDerivation) """
def osint(self, organization, domain, files, ext, delete, scope_file, aws, aws_fixes, html, screenshots, graph, nuke, whoxy_limit): """ The OSINT toolkit:\n This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information. ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history. View the README for the full detailsand lists of API keys! Note: If providing a scope file, acceptable IP addresses/ranges include: * Single Address: 8.8.8.8 * Basic CIDR: 8.8.8.0/24 * Nmap-friendly Range: 8.8.8.8-10 * Underscores? OK: 8.8.8.8_8.8.8.10 """ click.clear() asciis.print_art() print(green("[+] OSINT Module Selected: ODIN will run all recon modules.")) verbose = None if verbose: print( yellow( "[*] Verbose output Enabled -- Enumeration of RDAP contact information \ is enabled, so you may get a lot of it if scope includes a large cloud provider." )) # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() # Create reporter object and generate final list, the scope from scope file report = reporter.Reporter(report_path, output_report) report.create_tables() scope, ip_list, domain_list = report.prepare_scope( ip_list, domain_list, scope_file, domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain, )) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain_list, organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Address Recon", target=report.create_domain_report_table, args=(organization, scope, ip_list, domain_list, whoxy_limit)) jobs.append(domain_report) shodan_report = Process(name="Shodan Queries", target=report.create_shodan_table, args=(ip_list, domain_list)) more_jobs.append(shodan_report) urlcrazy_report = Process(name="Domain Squatting Recon", target=report.create_urlcrazy_table, args=(organization, domain)) more_jobs.append(urlcrazy_report) cloud_report = Process(name="Cloud Recon", target=report.create_cloud_table, args=(organization, domain, aws, aws_fixes)) even_more_jobs.append(cloud_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path, )) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_foca_table, args=(domain, ext, delete, report_path, verbose)) more_jobs.append(files_report) print( green( "[+] Beginning initial discovery phase! This could take some time..." )) for job in jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in jobs: job.join() print( green( "[+] Initial discovery is complete! Proceeding with additional queries..." )) for job in more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in more_jobs: job.join() print(green("[+] Final phase: checking the cloud and web services...")) for job in even_more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in even_more_jobs: job.join() report.close_out_reporting() print( green( "[+] Job's done! Your results are in {} and can be viewed and queried with \ any SQLite browser.".format(output_report))) if graph: graph_reporter = grapher.Grapher(output_report) print( green( "[+] Loading ODIN database file {} for conversion to Neo4j" ).format(output_report)) if nuke: confirm = input( red("\n[!] You set the --nuke option. This wipes out all nodes \ for a fresh start. Proceed? (Y\\N) ")) if confirm.lower() == "y": graph_reporter.clear_neo4j_database() print(green("[+] Database successfully wiped!\n")) graph_reporter.convert() else: print( red("[!] Then you can convert your database to a graph database later. \ Run lib/grapher.py with the appropriate options.")) else: graph_reporter.convert() if html: print( green("\n[+] Creating the HTML report using {}.".format( output_report))) html_reporter = htmlreporter.HTMLReporter( organization, report_path + "/html_report/", output_report) html_reporter.generate_full_report()
def osint(self, organization, domain, files, ext, delete, scope_file, aws, aws_fixes, verbose, html, screenshots): """ The OSINT toolkit:\n This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data provided and hunt for information. On the human side, ODIN looks for employee names, email addresses, and social media profiles. Names and emails are cross-referenced with HaveIBeenPwned, Twitter's API, and search engines to collect additional information.\n ODIN also uses various tools and APIs to collect information on the provided IP addresses and domain names, including things like DNS and IP address history.\n View the README for the full detailsand lists of API keys! """ asciis.print_art() print(green("[+] OSINT Module Selected: ODIN will run all recon modules.")) if verbose: print( yellow( "[*] Verbose output Enabled -- Enumeration of RDAP contact information \ is enabled, so you may get a lot of it if scope includes a large cloud provider." )) # Perform prep work for reporting setup_reports(organization) report_path = "reports/{}/".format(organization) output_report = report_path + "OSINT_DB.db" if __name__ == "__main__": # Create manager server to handle variables shared between jobs manager = Manager() ip_list = manager.list() domain_list = manager.list() # Create reporter object and generate final list, the scope from scope file report = reporter.Reporter(output_report) report.create_tables() scope, ip_list, domain_list = report.prepare_scope( ip_list, domain_list, scope_file, domain) # Create some jobs and put Python to work! # Job queue 1 is for the initial phase jobs = [] # Job queue 2 is used for jobs using data from job queue 1 more_jobs = [] # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum even_more_jobs = [] company_info = Process(name="Company Info Collector", target=report.create_company_info_table, args=(domain, )) jobs.append(company_info) employee_report = Process(name="Employee Hunter", target=report.create_people_table, args=(domain, organization)) jobs.append(employee_report) domain_report = Process(name="Domain and IP Address Recon", target=report.create_domain_report_table, args=(scope, ip_list, domain_list, verbose)) jobs.append(domain_report) shodan_report = Process(name="Shodan Queries", target=report.create_shodan_table, args=(ip_list, domain_list)) more_jobs.append(shodan_report) urlcrazy_report = Process(name="Domain Squatting Recon", target=report.create_urlcrazy_table, args=(organization, domain)) more_jobs.append(urlcrazy_report) cloud_report = Process(name="Cloud Recon", target=report.create_cloud_table, args=(organization, domain, aws, aws_fixes)) even_more_jobs.append(cloud_report) if screenshots: take_screenshots = Process(name="Screenshot Snapper", target=report.capture_web_snapshots, args=(report_path, )) more_jobs.append(take_screenshots) if files: files_report = Process(name="File Hunter", target=report.create_foca_table, args=(domain, ext, delete, report_path, verbose)) jobs.append(files_report) print( green( "[+] Beginning initial discovery phase! This could take some time..." )) for job in jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in jobs: job.join() print( green( "[+] Initial discovery is complete! Proceeding with additional queries..." )) for job in more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in more_jobs: job.join() print(green("[+] Final phase: checking the cloud and web services...")) for job in even_more_jobs: print(green("[+] Starting new process: {}".format(job.name))) job.start() for job in even_more_jobs: job.join() report.close_out_reporting() print( green("[+] Job's done! Your results are in {}.".format( output_report))) if html: html_reporter = htmlreporter.HTMLReporter( organization, report_path + "/html_report/", output_report) html_reporter.generate_full_report()
def _multi_channel_apply_disk_parallel(self, function, cleanup_function, output_path, from_time, to_time, channels, cast_dtype, pass_batch_info, pass_batch_results, processes, **kwargs): self.logger.debug('Starting parallel operation...') if pass_batch_results: raise NotImplementedError("pass_batch_results is not " "implemented on 'disk' mode") # need to convert to a list, oherwise cannot be pickled data = list( self.multi_channel(from_time, to_time, channels, return_data=False)) n_batches = self.indexer.n_batches(from_time, to_time, channels) self.logger.info('Data will be splitted in %s batches', n_batches) output_path = Path(output_path) # create local variables to avoid pickling problems _path_to_recordings = copy(self.path_to_recordings) _dtype = copy(self.dtype) _n_channels = copy(self.n_channels) _data_order = copy(self.data_order) _loader = copy(self.loader) _buffer_size = copy(self.buffer_size) reader = partial(RecordingsReader, path_to_recordings=_path_to_recordings, dtype=_dtype, n_channels=_n_channels, data_order=_data_order, loader=_loader, return_data_index=True) m = Manager() mapping = m.dict() next_to_write = m.Value('i', 0) def parallel_runner(element): i, _ = element res = util.batch_runner(element, function, reader, pass_batch_info, cast_dtype, kwargs, cleanup_function, _buffer_size, save_chunks=False, output_path=output_path) if i == 0: mapping['dtype'] = str(res.dtype) while True: if next_to_write.value == i: with open(str(output_path), 'wb' if i == 0 else 'ab') as f: res.tofile(f) next_to_write.value += 1 break # run jobs self.logger.debug('Creating processes pool...') p = Pool(processes) res = p.map_async(parallel_runner, enumerate(data)) finished = 0 if self.show_progress_bar: pbar = tqdm(total=n_batches) if self.show_progress_bar: while True: if next_to_write.value > finished: update = next_to_write.value - finished pbar.update(update) finished = next_to_write.value if next_to_write.value == n_batches: break pbar.close() else: res.get() # save metadata params = util.make_metadata(channels, self.n_channels, mapping['dtype'], output_path) return output_path, params