Python Manager.Manager Examples, multiprocess.Manager.Manager Python Examples

Example #1

0

Show file

def spin_crawl_threads(state, classifiers, MAX_BIT_SIZE, MAX_DL_THREADS, image_path):
    print("Running threads...")
    manager = Manager()

    location_q = manager.Queue(maxsize=16)
    image_q = manager.Queue(maxsize=64)
    state_lock = manager.Lock()

    generate_location = Process(target=generate_location_thread,
                                args=(location_q, MAX_BIT_SIZE),
                                name="generate_location")
    classification = Process(target=classification_thread,
                             args=(image_q, classifiers, image_path,
                                   state, state_lock), name="classification")
    download_image_t = Process(target=download_image_thread,
                               args=(location_q, image_q, MAX_DL_THREADS),
                               name="download_image")

    download_image_t.start()
    classification.start()
    generate_location.start()

    def kill_threads():
        for thread in active_children():
            thread.terminate()

    atexit.register(kill_threads)

    download_image_t.join()
    classification.join()
    generate_location.join()

Example #2

0

Show file

File: multilabel_naive_bayes.py Project: machine-learning-2018-2019-fasilkom-ui/SELAMAT-BELAJAR-SEMOGA-SUKSES

    def fit(self, X, Y):
        assert not self.fit_done
        assert len(X) == len(Y)

        possible_labels = list(set(y_val for y in Y for y_val in y))
        job_labels = np.array_split(possible_labels, self.n_jobs)

        with Manager() as manager:
            X_proxy = manager.list(X)
            Y_proxy = manager.list(Y)
            output_queue = Queue()
            processes = [
                Process(target=sequential_execute,
                        args=(output_queue, get_binary_clf_from_multilabel, [{
                            'X':
                            X_proxy,
                            'Y':
                            Y_proxy,
                            'label':
                            lbl,
                            'return_label':
                            True
                        } for lbl in job])) for job in job_labels
            ]
            [p.start() for p in processes]
            results = [output_queue.get()
                       for lbl in possible_labels]  # needs to be flattened
            [p.join() for p in processes]

        self.classifiers = dict(results)
        self.fit_done = True

Example #3

0

Show file

def main():
    """
    main
    """
    file_to_attack = './data/example_files/S_hecht_submission_3.csv'
    method_order = 'param'
    nb_element = 1
    month_spliter(file_to_attack)
    manager = Manager()
    queue_list = [manager.Queue(1) for _ in range(13)]
    with Pool(6) as p:
        print("HEY")
        p.map(maker, [i for i in range(13)], queue_list)
    for queue in queue_list:
        GUESS_PART.append(queue.get())
    GUESS_PART.sort()
    for i in range(NB_MONTH):
        char = "guess_par_t" + str(i) + ".json"
        with open(char, "w") as jsdump:
            json.dump(GUESS_PART[i][1], jsdump, indent=4)
    write_csv(GUESS_PART[0][1], GUESS_PART[1][1], GUESS_PART[2][1],
              GUESS_PART[3][1], GUESS_PART[4][1], GUESS_PART[5][1],
              GUESS_PART[6][1], GUESS_PART[7][1], GUESS_PART[8][1],
              GUESS_PART[9][1], GUESS_PART[10][1], GUESS_PART[11][1],
              GUESS_PART[12][1])

Example #4

0

Show file

 def add_progress_counter(self, init_mess="Beginning", end_mess="Done",
                          name_things='things', perc_interv=5):
     self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess,
                               nmb_max=len(self.data),
                               name_things=name_things,
                               perc_interv=perc_interv)
     self.manager = Manager()
     self.manager.register("PG", self.PG)

Example #5

0

Show file

def ospf_check():
    clear_log()
    devices = [x.split(',')[0] for x in open(devicesFile)]
    pool = Pool(processor)
    lock = Manager().Lock()
    list(pool.map(partial(_inf_ospf_check, lock), devices))
    pool.close()
    pool.join()

Example #6

0

Show file

def svlan_check():
    clear_log()
    #  nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246')
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='company', property_value='zte')
    olts = [(x['ip'], x['company'], x['area']) for x in nodes]
    #  list(map(compose(card_entry, get_card), olts))
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, get_svlan), olts))
    pool.close()
    pool.join()

Example #7

0

Show file

File: switch.py Project: sjava/olt

def interface_check_m():
    clear_log()
    #  cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model"
    cmd = "match(s: Switch)  return s.ip, s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    out_inf = partial(output_interface_m, lock)
    list(pool.map(compose(out_inf, get_interface), switchs))
    pool.close()
    pool.join()

Example #8

0

Show file

def zte_gpon_svlan_check():
    clear_log()
    nodes = graph.cypher.execute(
        "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)"
    )
    olts = ((x[0], x[1]) for x in nodes)
    lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1])
    pool = Pool(8)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, lzte_gpon_svlan), olts))
    pool.close()
    pool.join()

Example #9

0

Show file

    def run_parallel(self,
                     test_suites,
                     test_runner,
                     result_type=None,
                     results_path=None):

        exit_code = 0
        proc = None
        unittest.installHandler()
        processes = []
        manager = Manager()
        results = manager.dict()
        manager.dict()
        start = time.time()

        test_mapping = {}
        for test_suite in test_suites:
            # Give each test suite an uuid so it can be
            # matched to the correct test result
            test_id = str(uuid.uuid4())
            test_mapping[test_id] = test_suite

            proc = Process(target=self.execute_test,
                           args=(test_runner, test_id, test_suite, results))
            processes.append(proc)
            proc.start()

        for proc in processes:
            proc.join()

        finish = time.time()

        errors, failures, _ = self.dump_results(start, finish, results)

        if result_type is not None:
            all_results = []
            for test_id, result in list(results.items()):
                tests = test_mapping[test_id]
                result_parser = SummarizeResults(vars(result), tests,
                                                 (finish - start))
                all_results += result_parser.gather_results()

            reporter = Reporter(result_parser=result_parser,
                                all_results=all_results)
            reporter.generate_report(result_type=result_type,
                                     path=results_path)

        if failures or errors:
            exit_code = 1

        return exit_code

Example #10

0

Show file

def query(query_lst):

    manager = Manager()
    hits = manager.dict()

    results = []

    for q in query_lst:
        r = requests.get('http://dblp.uni-trier.de/search/publ/api',
                         params={
                             'q': q,
                             'h': 100,
                             'format': 'json'
                         })

        if r.status_code == 429:
            raise Error

        json_answer = r.json()

        res = json_answer["result"]["hits"].get("hit", None)

        if res is None:
            continue

        results += res

    def f(d, hit, n):

        if hit is None:
            return

        authors = hit["info"].pop("authors")
        if isinstance(authors["author"], dict):
            hit["info"]["authors"] = authors["author"]["text"]
        else:
            hit["info"]["authors"] = [
                fullname(a["text"]) for a in authors["author"]
            ]

        hit["info"]["bibtex"] = get_bib(hit["info"]["key"])
        d[n] = hit["info"]

    job = [
        Process(target=f, args=(hits, hit, n)) for n, hit in enumerate(results)
    ]
    _ = [p.start() for p in job]
    _ = [p.join() for p in job]

    return dict(hits)

Example #11

0

Show file

    def get_city_states(self):
        """
        Creates city states from start time to end time
        :param:
        :return:
        """
        city_states = []
        start_time = self.start_time
        end_time = self.end_time

        # Create array of time slice values between the start and end time
        business_days = self.config['city_state_creator']['business_days']
        business_hours_start = self.config['city_state_creator'][
            'business_hours_start']
        business_hours_end = self.config['city_state_creator'][
            'business_hours_end']
        index = pd.date_range(start=start_time,
                              end=end_time,
                              freq=str(self.time_unit_duration) + 'min')

        # Filter only the required days and hours
        index = index[index.day_name().isin(business_days)]
        index = index[(index.hour >= business_hours_start)
                      & (index.hour <= business_hours_end)]
        time_slice_starts = index - timedelta(
            minutes=self.time_slice_duration / 2)
        time_slice_ends = index + timedelta(minutes=self.time_slice_duration /
                                            2)

        # Create arguments dictionary for parallelization
        self.parallel_args = self.create_parallel_args(index,
                                                       time_slice_starts,
                                                       time_slice_ends)

        # Create city states
        manager = Manager()
        city_states = manager.dict()
        N = len(index.values)

        # Create parallel pool
        self.logger.info("Creating parallelization pool")
        pool = ProcessPool(nodes=25)
        pool.map(self.get_city_state, ([city_states, t] for t in xrange(N)))
        pool.close()
        pool.join()
        pool.clear()
        self.logger.info("Finished creating city states")

        return dict(city_states)

Example #12

0

Show file

def hostname_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(hostname_entry, lock)
    list(pool.map(compose(func, get_hostname), olts))
    pool.close()
    pool.join()
    ip_hostname = (x.split(',') for x in open(result_file))
    cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}"
    list(
        map(lambda x: graph.cypher.execute(cmd, ip=x[0], hostname=x[1]),
            ip_hostname))

Example #13

0

Show file

File: task51.py Project: fresh1987/multiprocessing

    def create_csv(self):
        if __name__ == '__main__':
            t1 = time()
            file1 = open(self.out_csv1, "w")
            file1.write("id" + ',' + "level" + '\n')
            file2 = open(self.out_csv2, "w")
            file2.write("id" + ',' + "object_name" + '\n')
            file1.close()
            file2.close()

            i = range(len(self.list_of_zips))
            p = Pool()
            m = Manager()
            l = m.Lock()
            func = partial(self.parse_Zip, l)
            p.map(func, i)
            p.close()
            p.join()
        print('Create .csv files time = ' + str(time() - t1) + 's')

Example #14

0

Show file

def zhongji_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(zhongji_entry, lock)
    list(pool.map(compose(func, get_zhongji), olts))
    pool.close()
    pool.join()
    ports = (x.split(',') for x in open(result_file))
    cmd = """match(n: Olt) where n.ip = {ip} 
    merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) 
    merge(m) - [:Include]->(p: Port{name: {interface}})"""
    list(
        map(
            lambda x: graph.cypher.execute(
                cmd, ip=x[0], sm=x[1], interface=x[2]), ports))

Example #15

0

Show file

def run_post_process():
    es = ES(FLAGS.configfile_name)
    manager=Manager()
    lock=manager.Lock()
    shared_dict=manager.dict({'time':0,"id":""})
    process_num=int(cpu_count()-2)

    generator_list=[]
    for i in range(process_num):
        generator_list.append(_generator(lock,shared_dict,es))

    #%%
    p=[]
    for i in range(process_num):
        p.append(Process(target=_process_unknown_record,args=(generator_list[i],)))
        p[i].start()

    for q in p:
        q.join()

Example #16

0

Show file

def download_image_thread(location_q, image_q, MAX_DL_THREADS=10):
    print("Running Download Image Thread.")

    max_processes = MAX_DL_THREADS
    print("Creating a thread pool of size {} for downloading images...".format(max_processes))
    pool = Pool(processes=max_processes)
    # Allow us to have n processes runnning, and n processes scheduled to run
    # TODO: Manager is not necessary here, but is used to get around the fact
    # that thread-safe objects cannot be passed by reference, they must be
    # inheretence. A more lightweight solution should be found
    workers = Manager().Semaphore(max_processes*2)

    def async_download(location):
        image = download_image(location)
        image_q.put((location, image), True)
        workers.release()

    while True:
        location = location_q.get(True)
        workers.acquire()
        pool.apply_async(async_download, (location,))

Example #17

0

Show file

File: folderbased_cut_speech.py Project: sunclb/jumpstartcourse

def folderbase_cut_silence(input_folder, cut_interval):

    output_no_silence = os.path.join(input_folder, "remove_silence")
    # if not os.path.exists(output_folder):
    # 	os.mkdir(output_folder)
    if not os.path.exists(output_no_silence):
        os.mkdir(output_no_silence)
    wav_files = []
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            wav_files.append(filename)

    def process_files(lock, file):
        try:
            #exclude log.txt file
            if re.search(".+\.wav", file):
                wave_file = os.path.join(input_folder, file)
                wo_num = cut_wav_without_silence(wave_file, output_no_silence,
                                                 cut_interval)
                with cut_silence_file_num.get_lock():
                    cut_silence_file_num.value += 1
                with cut_silence_out_file_num.get_lock():
                    cut_silence_out_file_num.value += wo_num
                os.remove(wave_file)

        except Exception as e:
            logging.info(e)
            with cut_silence_fail_file.get_lock():
                cut_silence_fail_file.value += 1

    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(wav_files)
    pool.map(process_files, locks, wav_files)
    loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed
		'''.format(cut_silence_file_num.value, cut_silence_out_file_num.value,
             cut_silence_fail_file.value)
    logging.info(loginfo)

Example #18

0

Show file

    def __init__(self,
                 initial_patterns_list,
                 input_text_file,
                 data_source_type,
                 read_file_format='rb'):
        self.read_format = read_file_format
        self.pattern_to_data = {}  #defaultdict(PatternData)

        patterns_data = [
            PatternData(pattern) for pattern in initial_patterns_list
        ]
        self.pattern_to_data = Manager().dict(
            {pattern.formatted: pattern
             for pattern in patterns_data})

        patterns_len = [
            len(pattern.split()) for pattern in self.pattern_to_data.keys()
        ]
        self.min_pattern_len = min(patterns_len)
        self.max_pattern_len = max(patterns_len)
        self.data_wrapper = data_wrapper_factory(input_text_file,
                                                 data_source_type)

Example #19

0

Show file

File: switch.py Project: sjava/weihu

def add_power_info():
    funcs = {
        'S8508': S85.get_power_info,
        'S8505': S85.get_power_info,
        'T64G': T64.get_power_info,
        'S8905': S89.get_power_info,
        'S8905E': S8905E.get_power_info,
        'S9306': S93.get_power_info,
        'S9303': S93.get_power_info
    }
    get_power_info = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model"
    )
    switches = [dict(ip=x['ip'], model=x['model']) for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_power_info, lock)
    list(pool.map(compose(_ff, get_power_info), switches))
    pool.close()
    pool.join()

Example #20

0

Show file

    def sample(self, n_samples: int, beta: float = 1.):
        with Manager() as mgr:
            queues_work = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)]
            queues_return = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)]

            _ = self.parallel_pool.starmap(
                func=worker_init,
                iterable=[(queues_work[idx], queues_return[idx], idx, self.samplers[idx])
                          for idx in range(self.num_chains)])

            worker_results = [self.parallel_pool.apply_async(func=worker_run) for _ in range(self.num_chains)]

            swapped = [None for _ in self.samplers]
            last_samples = [None for _ in self.samplers]
            for i_sample in tqdm(range(int(n_samples))):
                logger.debug('MAIN PROCESS: deploying work...')
                for idx, beta in enumerate(self.betas):
                    queues_work[idx].put((idx, copy.deepcopy(swapped[idx]), beta, False))  # sample
                logger.debug('MAIN PROCESS: waiting for return...')
                for idx in range(len(self.samplers)):
                    idx, last_sample, beta = queues_return[idx].get()  # get sample
                    last_samples[idx] = last_sample
                logger.debug('MAIN PROCESS: swapping samples...')
                swapped = self.swap_samples(last_samples)  # swap samples
                logger.debug('MAIN PROCESS: swapping samples...')
                self.adjust_betas(i_sample, swapped, last_samples)  # adjust temps
            # logger.debug('stopping workers...')
            _ = [queues_work[idx].put((idx, None, 0.00, True)) for idx in range(self.num_chains)]
            _ = [queues_work[idx].join() for idx in range(self.num_chains)]
            # logger.debug('reached getting from finalqueue')
            for worker_result in worker_results:
                idx, sampler_obj = worker_result.get()
                logger.debug(f'GATHERED sampler {idx} trace_x: {len(sampler_obj.trace_x)}')
                self.samplers[idx] = sampler_obj

            self.parallel_pool.close()
            self.parallel_pool.join()

Example #21

0

Show file

File: switch.py Project: sjava/weihu

def add_traffics():
    funcs = {
        'S8508': S85.get_traffics,
        'S8505': S85.get_traffics,
        'T64G': T64.get_traffics,
        'S8905': S89.get_traffics,
        'S8905E': S8905E.get_traffics,
        'S9306': S93.get_traffics,
        'S9303': S93.get_traffics
    }
    get_traffics = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model"
    )
    switchs = [
        dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes
    ]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_traffics, lock)
    list(pool.map(compose(_ff, get_traffics), switchs))
    pool.close()
    pool.join()

Example #22

0

Show file

def folderbase_convert_to_wave(webmfolder, wavefolder):
    def process_convert(lock, filename):
        my_logger.debug("filename is {}".format(filename))
        with total_num.get_lock():
            total_num.value += 1
        try:
            success = convert_to_wav(filename, wavefolder)
            with success_num.get_lock():
                success_num.value += success
            os.remove(filename)
        except Exception as e:
            line = "\t".join([str(datetime.datetime.now()), filename, str(e)])
            my_logger.info(line)
            fail_folder = "data/convert_failed"
            if not os.path.exists(fail_folder):
                os.mkdir(fail_folder)
            filebase = os.path.basename(filename)
            failed_file = os.path.join(fail_folder, filebase)
            os.rename(filename, failed_file)
            with fail_num.get_lock():
                fail_num.value += 1
        return 1

    filenames = []
    for file in mp3gen(webmfolder):
        if re.search("wav", file): continue
        filenames.append(file)
    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(filenames)
    pool.map(process_convert, locks, filenames)

    my_logger.info(
        "{}/{} files successfully converted to wave and {} files failed".
        format(success_num.value, total_num.value, fail_num.value))

Example #23

0

Show file

File: folderbased_cut_speech.py Project: sunclb/jumpstartcourse

def folderbase_cut_interval(input_folder, output_folder, cut_period):
    wav_files = []
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            wav_files.append(os.path.join(root, filename))


#	for file in wav_files:

    def process_files(lock, file):
        try:
            if re.search(".+\.wav", file):
                with file_num.get_lock():
                    file_num.value += 1
                filebasename = os.path.basename(file)
                filebasename, _ = os.path.splitext(filebasename)
                #get audio properties
                audio_prop = {}
                with wave.open(file, mode='rb') as newAudio:
                    audio_prop["nchannels"] = newAudio.getnchannels()
                    audio_prop["nframes"] = newAudio.getnframes()
                    audio_prop["sampwidth"] = newAudio.getsampwidth()
                    audio_prop["framerate"] = newAudio.getframerate()
                    audio_prop["comptype"] = newAudio.getcomptype()
                    audio_prop["compname"] = newAudio.getcompname()
                audio_duration = audio_prop["nframes"] / audio_prop["framerate"]

                precut_duration = cut_period
                cut_start = 0
                cut_return = 0
                cut_num = 0
                index = 0
                while cut_start < audio_duration:
                    cut_end = cut_start + precut_duration
                    cut_audio, cutaudio_prop = cut_wave(file,
                                                        cut_start,
                                                        cut_end,
                                                        start_bias=0,
                                                        end_bias=0)
                    newfile = os.path.join(
                        output_folder,
                        filebasename + "_" + str(index) + ".wav")
                    index += 1
                    with wave.open(newfile, "wb") as newAudio:
                        newAudio.setparams((cutaudio_prop["nchannels"],
                                            cutaudio_prop["sampwidth"],
                                            cutaudio_prop["framerate"],
                                            cutaudio_prop["nframes"],
                                            cutaudio_prop["comptype"],
                                            cutaudio_prop["compname"]))
                        newAudio.writeframes(cut_audio)
                    cut_start = cut_start + precut_duration
                    with out_file_num.get_lock():
                        out_file_num.value += 1
                os.remove(file)
        except Exception as e:
            logging.info(e)
            with fail_file.get_lock():
                fail_file.value += 1

    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(wav_files)
    pool.map(process_files, locks, wav_files)
    loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed
	'''.format(file_num.value, out_file_num.value, fail_file.value)
    logging.info(loginfo)

Example #24

0

Show file

File: start.py Project: varatep/open-horizon-examples

# Settings: Location, Units, and rapidfire (optional)
latitude = check_env_var("HZN_LAT", printerr=True)
longitude = check_env_var("HZN_LON", printerr=True)
pws_units = check_env_var("PWS_UNITS", default='us', printerr=True)    # weewx recommends only using 'us'
pws_wu_loc = check_env_var("PWS_WU_LOC", default='', printerr=True)
pws_wu_rapidfire = check_env_var("PWS_WU_RPDF", default='False', printerr=True)

# Deal with a potential lower-case (boolean value from Horizon) or erroneous value
if pws_wu_rapidfire == "true" or pws_wu_rapidfire == "True": 
    pws_wu_rapidfire = "True"
else: 
    pws_wu_rapidfire = "False"


## Shared data structure (dict for flask server to read & serve)
manager = Manager()
sdata = manager.dict()
standard_params = ["wu_id", "stationtype", "model", "latitude", "longitude", "units", "location"]
standard_values = [pws_wu_id, pws_station_type, pws_model, latitude, longitude, pws_units, pws_wu_loc]
sdata["r"] = dict(zip(["status"], ["Station initializing..."]))
sdata["t"] = str(int(time.time()))                                      # Timestamp
sdata["i"] = dict(zip(standard_params, standard_values))                # Station Info

## Flask HTTPserver ----------------------------------------------------------
## Start simple flask server at localhost:port and pass in shared data dict
p_flask = Process(target=fl.run_server, args=('0.0.0.0', 8357, sdata))
p_flask.start()

## Weewx service -------------------------------------------------------------
# Modify the weewx configuration file with our env var settings
weemod = weewx_mod(weewx_config_file, pws_station_type)

Example #25

0

Show file

File: SpatioTemporalESN.py Project: tianyuan445327/easyesn

    def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        manager = Manager()
        predictQueue = manager.Queue()

        # workaround as predict does not support batches atm
        # add dummy dimension to let embedInputData work properly (is optimized to work for batches)
        inputData = inputData.reshape(1, *inputData.shape)
        modifiedInputData = self._embedInputData(inputData)
        modifiedInputData = modifiedInputData[0]
        inputData = inputData[0]

        self.transientTime = transientTime
        self.sharedNamespace.transientTime = transientTime
        predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime))

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]),
                        axis=rank).reshape(-1, rank).tolist()
        nJobs = len(jobs)

        self.resetState()

        iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess,
                    initargs=[predictQueue, self])
        pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = predictQueue.get()
                # result of predicting
                indices, prediction, state = data
                id = self._uniqueIDFromIndices(indices)
                self._xs[id] = state
                # update the values
                predictionOutput[tuple([Ellipsis] + indices)] = prediction

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()

        return predictionOutput

Example #26

0

Show file

File: SpatioTemporalESN.py Project: tianyuan445327/easyesn

    def fit(self, inputData, outputData, transientTime=0, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        # reshape the input so that it has the shape (timeseries, time, input_dimension^n)
        if rank == self.n_inputDimensions:
            inputData = inputData.reshape(1, *inputData.shape)
            outputData = outputData.reshape(1, *outputData.shape)
        else:
            # modify rank again
            rank -= 1

        partialLength = (inputData.shape[1] - transientTime)
        totalLength = inputData.shape[0] * partialLength
        timeseriesCount = inputData.shape[0]

        manager = Manager()
        fitQueue = manager.Queue()

        modifiedInputData = self._embedInputData(inputData)

        self.sharedNamespace.transientTime = transientTime

        self.sharedNamespace.partialLength = partialLength
        self.sharedNamespace.totalLength = totalLength
        self.sharedNamespace.timeseriesCount = timeseriesCount

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]),
                        axis=rank).reshape(-1, rank).tolist()

        nJobs = len(jobs)

        self.resetState()

        iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self])
        pool.map_async(self._fitProcess, iterator, chunksize=16)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = fitQueue.get()

                # result of fitting
                indices, x, WOut = data
                id = self._uniqueIDFromIndices(indices)

                if WOut is None:
                    import sys
                    print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr)

                # store WOut
                if self._averageOutputWeights:
                    if WOut is not None:
                        self._WOut += WOut / np.prod(self.inputShape)
                else:
                    self._WOuts[id] = WOut

                    # store x
                self._xs[id] = x

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()

Example #27

0

Show file

File: SpatioTemporalESN.py Project: tianyuan445327/easyesn

    def __init__(self, inputShape, n_reservoir,
                 filterSize=1, stride=1, borderMode="mirror", nWorkers="auto",
                 spectralRadius=1.0, noiseLevel=0.0, inputScaling=None,
                 leakingRate=1.0, reservoirDensity=0.2, randomSeed=None, averageOutputWeights=True,
                 out_activation=lambda x: x, out_inverse_activation=lambda x: x,
                 weightGeneration='naive', bias=1.0, outputBias=1.0,
                 outputInputScaling=1.0, inputDensity=1.0, solver='pinv', regressionParameters={}, activation=B.tanh,
                 activationDerivation=lambda x: 1.0 / B.cosh(x) ** 2):

        self._averageOutputWeights = averageOutputWeights
        if averageOutputWeights and solver != "lsqr":
            raise ValueError(
                "`averageOutputWeights` can only be set to `True` when `solver` is set to `lsqr` (Ridge Regression)")

        self._borderMode = borderMode
        if not borderMode in ["mirror", "padding", "edge", "wrap"]:
            raise ValueError(
                "`borderMode` must be set to one of the following values: `mirror`, `padding`, `edge` or `wrap`.")

        self._regressionParameters = regressionParameters
        self._solver = solver

        n_inputDimensions = len(inputShape)

        if filterSize % 2 == 0:
            raise ValueError("filterSize has to be an odd number (1, 3, 5, ...).")
        self._filterSize = filterSize
        self._filterWidth = int(np.floor(filterSize / 2))
        self._stride = stride

        self._n_input = int(np.power(np.ceil(filterSize / stride), n_inputDimensions))

        self.n_inputDimensions = n_inputDimensions
        self.inputShape = inputShape

        if not self._averageOutputWeights:
            self._WOuts = B.empty((np.prod(inputShape), 1, self._n_input + n_reservoir + 1))
            self._WOut = None
        else:
            self._WOuts = None
            self._WOut = B.zeros((1, self._n_input + n_reservoir + 1))
        self._xs = B.empty((np.prod(inputShape), n_reservoir, 1))

        if nWorkers == "auto":
            self._nWorkers = np.max((cpu_count() - 1, 1))
        else:
            self._nWorkers = nWorkers

        manager = Manager()
        self.sharedNamespace = manager.Namespace()
        if hasattr(self, "fitWorkerID") == False or self.parallelWorkerIDs is None:
            self.parallelWorkerIDs = manager.Queue()
            for i in range(self._nWorkers):
                self.parallelWorkerIDs.put((i))

        super(SpatioTemporalESN, self).__init__(n_input=self._n_input, n_reservoir=n_reservoir, n_output=1,
                                                spectralRadius=spectralRadius,
                                                noiseLevel=noiseLevel, inputScaling=inputScaling,
                                                leakingRate=leakingRate, reservoirDensity=reservoirDensity,
                                                randomSeed=randomSeed, out_activation=out_activation,
                                                out_inverse_activation=out_inverse_activation,
                                                weightGeneration=weightGeneration, bias=bias, outputBias=outputBias,
                                                outputInputScaling=outputInputScaling,
                                                inputDensity=inputDensity, activation=activation,
                                                activationDerivation=activationDerivation)

        """

Example #28

0

Show file

File: odin.py Project: wsygoogol/ODIN

def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, html, screenshots, graph, nuke, whoxy_limit):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the README for the full detailsand lists of API keys!

Note: If providing a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    verbose = None

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(report_path, output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(organization, scope, ip_list,
                                      domain_list, whoxy_limit))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            more_jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green(
                "[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report)))

        if graph:
            graph_reporter = grapher.Grapher(output_report)
            print(
                green(
                    "[+] Loading ODIN database file {} for conversion to Neo4j"
                ).format(output_report))

            if nuke:
                confirm = input(
                    red("\n[!] You set the --nuke option. This wipes out all nodes \
for a fresh start. Proceed? (Y\\N) "))
                if confirm.lower() == "y":
                    graph_reporter.clear_neo4j_database()
                    print(green("[+] Database successfully wiped!\n"))
                    graph_reporter.convert()
                else:
                    print(
                        red("[!] Then you can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options."))
            else:
                graph_reporter.convert()

        if html:
            print(
                green("\n[+] Creating the HTML report using {}.".format(
                    output_report)))
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()

Example #29

0

Show file

def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, verbose, html, screenshots):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.\n
ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.\n
View the README for the full detailsand lists of API keys!
    """
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(scope, ip_list, domain_list, verbose))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green("[+] Job's done! Your results are in {}.".format(
                output_report)))

        if html:
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()

Example #30

0

Show file

File: batch.py Project: kathefter/yass

    def _multi_channel_apply_disk_parallel(self, function, cleanup_function,
                                           output_path, from_time, to_time,
                                           channels, cast_dtype,
                                           pass_batch_info, pass_batch_results,
                                           processes, **kwargs):

        self.logger.debug('Starting parallel operation...')

        if pass_batch_results:
            raise NotImplementedError("pass_batch_results is not "
                                      "implemented on 'disk' mode")

        # need to convert to a list, oherwise cannot be pickled
        data = list(
            self.multi_channel(from_time, to_time, channels,
                               return_data=False))
        n_batches = self.indexer.n_batches(from_time, to_time, channels)

        self.logger.info('Data will be splitted in %s batches', n_batches)

        output_path = Path(output_path)

        # create local variables to avoid pickling problems
        _path_to_recordings = copy(self.path_to_recordings)
        _dtype = copy(self.dtype)
        _n_channels = copy(self.n_channels)
        _data_order = copy(self.data_order)
        _loader = copy(self.loader)
        _buffer_size = copy(self.buffer_size)

        reader = partial(RecordingsReader,
                         path_to_recordings=_path_to_recordings,
                         dtype=_dtype,
                         n_channels=_n_channels,
                         data_order=_data_order,
                         loader=_loader,
                         return_data_index=True)

        m = Manager()
        mapping = m.dict()
        next_to_write = m.Value('i', 0)

        def parallel_runner(element):
            i, _ = element

            res = util.batch_runner(element,
                                    function,
                                    reader,
                                    pass_batch_info,
                                    cast_dtype,
                                    kwargs,
                                    cleanup_function,
                                    _buffer_size,
                                    save_chunks=False,
                                    output_path=output_path)

            if i == 0:
                mapping['dtype'] = str(res.dtype)

            while True:
                if next_to_write.value == i:
                    with open(str(output_path), 'wb' if i == 0 else 'ab') as f:
                        res.tofile(f)

                    next_to_write.value += 1
                    break

        # run jobs
        self.logger.debug('Creating processes pool...')

        p = Pool(processes)
        res = p.map_async(parallel_runner, enumerate(data))

        finished = 0

        if self.show_progress_bar:
            pbar = tqdm(total=n_batches)

        if self.show_progress_bar:

            while True:
                if next_to_write.value > finished:
                    update = next_to_write.value - finished
                    pbar.update(update)
                    finished = next_to_write.value

                if next_to_write.value == n_batches:
                    break

            pbar.close()
        else:
            res.get()

        # save metadata
        params = util.make_metadata(channels, self.n_channels,
                                    mapping['dtype'], output_path)

        return output_path, params