Example #1
0
def spin_crawl_threads(state, classifiers, MAX_BIT_SIZE, MAX_DL_THREADS, image_path):
    print("Running threads...")
    manager = Manager()

    location_q = manager.Queue(maxsize=16)
    image_q = manager.Queue(maxsize=64)
    state_lock = manager.Lock()

    generate_location = Process(target=generate_location_thread,
                                args=(location_q, MAX_BIT_SIZE),
                                name="generate_location")
    classification = Process(target=classification_thread,
                             args=(image_q, classifiers, image_path,
                                   state, state_lock), name="classification")
    download_image_t = Process(target=download_image_thread,
                               args=(location_q, image_q, MAX_DL_THREADS),
                               name="download_image")

    download_image_t.start()
    classification.start()
    generate_location.start()

    def kill_threads():
        for thread in active_children():
            thread.terminate()

    atexit.register(kill_threads)

    download_image_t.join()
    classification.join()
    generate_location.join()
    def fit(self, X, Y):
        assert not self.fit_done
        assert len(X) == len(Y)

        possible_labels = list(set(y_val for y in Y for y_val in y))
        job_labels = np.array_split(possible_labels, self.n_jobs)

        with Manager() as manager:
            X_proxy = manager.list(X)
            Y_proxy = manager.list(Y)
            output_queue = Queue()
            processes = [
                Process(target=sequential_execute,
                        args=(output_queue, get_binary_clf_from_multilabel, [{
                            'X':
                            X_proxy,
                            'Y':
                            Y_proxy,
                            'label':
                            lbl,
                            'return_label':
                            True
                        } for lbl in job])) for job in job_labels
            ]
            [p.start() for p in processes]
            results = [output_queue.get()
                       for lbl in possible_labels]  # needs to be flattened
            [p.join() for p in processes]

        self.classifiers = dict(results)
        self.fit_done = True
Example #3
0
def main():
    """
    main
    """
    file_to_attack = './data/example_files/S_hecht_submission_3.csv'
    method_order = 'param'
    nb_element = 1
    month_spliter(file_to_attack)
    manager = Manager()
    queue_list = [manager.Queue(1) for _ in range(13)]
    with Pool(6) as p:
        print("HEY")
        p.map(maker, [i for i in range(13)], queue_list)
    for queue in queue_list:
        GUESS_PART.append(queue.get())
    GUESS_PART.sort()
    for i in range(NB_MONTH):
        char = "guess_par_t" + str(i) + ".json"
        with open(char, "w") as jsdump:
            json.dump(GUESS_PART[i][1], jsdump, indent=4)
    write_csv(GUESS_PART[0][1], GUESS_PART[1][1], GUESS_PART[2][1],
              GUESS_PART[3][1], GUESS_PART[4][1], GUESS_PART[5][1],
              GUESS_PART[6][1], GUESS_PART[7][1], GUESS_PART[8][1],
              GUESS_PART[9][1], GUESS_PART[10][1], GUESS_PART[11][1],
              GUESS_PART[12][1])
Example #4
0
 def add_progress_counter(self, init_mess="Beginning", end_mess="Done",
                          name_things='things', perc_interv=5):
     self.PG = ProgressCounter(init_mess=init_mess, end_mess=end_mess,
                               nmb_max=len(self.data),
                               name_things=name_things,
                               perc_interv=perc_interv)
     self.manager = Manager()
     self.manager.register("PG", self.PG)
Example #5
0
def ospf_check():
    clear_log()
    devices = [x.split(',')[0] for x in open(devicesFile)]
    pool = Pool(processor)
    lock = Manager().Lock()
    list(pool.map(partial(_inf_ospf_check, lock), devices))
    pool.close()
    pool.join()
Example #6
0
def svlan_check():
    clear_log()
    #  nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246')
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='company', property_value='zte')
    olts = [(x['ip'], x['company'], x['area']) for x in nodes]
    #  list(map(compose(card_entry, get_card), olts))
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, get_svlan), olts))
    pool.close()
    pool.join()
Example #7
0
File: switch.py Project: sjava/olt
def interface_check_m():
    clear_log()
    #  cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model"
    cmd = "match(s: Switch)  return s.ip, s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    out_inf = partial(output_interface_m, lock)
    list(pool.map(compose(out_inf, get_interface), switchs))
    pool.close()
    pool.join()
Example #8
0
def zte_gpon_svlan_check():
    clear_log()
    nodes = graph.cypher.execute(
        "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)"
    )
    olts = ((x[0], x[1]) for x in nodes)
    lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1])
    pool = Pool(8)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, lzte_gpon_svlan), olts))
    pool.close()
    pool.join()
Example #9
0
    def run_parallel(self,
                     test_suites,
                     test_runner,
                     result_type=None,
                     results_path=None):

        exit_code = 0
        proc = None
        unittest.installHandler()
        processes = []
        manager = Manager()
        results = manager.dict()
        manager.dict()
        start = time.time()

        test_mapping = {}
        for test_suite in test_suites:
            # Give each test suite an uuid so it can be
            # matched to the correct test result
            test_id = str(uuid.uuid4())
            test_mapping[test_id] = test_suite

            proc = Process(target=self.execute_test,
                           args=(test_runner, test_id, test_suite, results))
            processes.append(proc)
            proc.start()

        for proc in processes:
            proc.join()

        finish = time.time()

        errors, failures, _ = self.dump_results(start, finish, results)

        if result_type is not None:
            all_results = []
            for test_id, result in list(results.items()):
                tests = test_mapping[test_id]
                result_parser = SummarizeResults(vars(result), tests,
                                                 (finish - start))
                all_results += result_parser.gather_results()

            reporter = Reporter(result_parser=result_parser,
                                all_results=all_results)
            reporter.generate_report(result_type=result_type,
                                     path=results_path)

        if failures or errors:
            exit_code = 1

        return exit_code
Example #10
0
def query(query_lst):

    manager = Manager()
    hits = manager.dict()

    results = []

    for q in query_lst:
        r = requests.get('http://dblp.uni-trier.de/search/publ/api',
                         params={
                             'q': q,
                             'h': 100,
                             'format': 'json'
                         })

        if r.status_code == 429:
            raise Error

        json_answer = r.json()

        res = json_answer["result"]["hits"].get("hit", None)

        if res is None:
            continue

        results += res

    def f(d, hit, n):

        if hit is None:
            return

        authors = hit["info"].pop("authors")
        if isinstance(authors["author"], dict):
            hit["info"]["authors"] = authors["author"]["text"]
        else:
            hit["info"]["authors"] = [
                fullname(a["text"]) for a in authors["author"]
            ]

        hit["info"]["bibtex"] = get_bib(hit["info"]["key"])
        d[n] = hit["info"]

    job = [
        Process(target=f, args=(hits, hit, n)) for n, hit in enumerate(results)
    ]
    _ = [p.start() for p in job]
    _ = [p.join() for p in job]

    return dict(hits)
Example #11
0
    def get_city_states(self):
        """
        Creates city states from start time to end time
        :param:
        :return:
        """
        city_states = []
        start_time = self.start_time
        end_time = self.end_time

        # Create array of time slice values between the start and end time
        business_days = self.config['city_state_creator']['business_days']
        business_hours_start = self.config['city_state_creator'][
            'business_hours_start']
        business_hours_end = self.config['city_state_creator'][
            'business_hours_end']
        index = pd.date_range(start=start_time,
                              end=end_time,
                              freq=str(self.time_unit_duration) + 'min')

        # Filter only the required days and hours
        index = index[index.day_name().isin(business_days)]
        index = index[(index.hour >= business_hours_start)
                      & (index.hour <= business_hours_end)]
        time_slice_starts = index - timedelta(
            minutes=self.time_slice_duration / 2)
        time_slice_ends = index + timedelta(minutes=self.time_slice_duration /
                                            2)

        # Create arguments dictionary for parallelization
        self.parallel_args = self.create_parallel_args(index,
                                                       time_slice_starts,
                                                       time_slice_ends)

        # Create city states
        manager = Manager()
        city_states = manager.dict()
        N = len(index.values)

        # Create parallel pool
        self.logger.info("Creating parallelization pool")
        pool = ProcessPool(nodes=25)
        pool.map(self.get_city_state, ([city_states, t] for t in xrange(N)))
        pool.close()
        pool.join()
        pool.clear()
        self.logger.info("Finished creating city states")

        return dict(city_states)
Example #12
0
def hostname_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(hostname_entry, lock)
    list(pool.map(compose(func, get_hostname), olts))
    pool.close()
    pool.join()
    ip_hostname = (x.split(',') for x in open(result_file))
    cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}"
    list(
        map(lambda x: graph.cypher.execute(cmd, ip=x[0], hostname=x[1]),
            ip_hostname))
Example #13
0
    def create_csv(self):
        if __name__ == '__main__':
            t1 = time()
            file1 = open(self.out_csv1, "w")
            file1.write("id" + ',' + "level" + '\n')
            file2 = open(self.out_csv2, "w")
            file2.write("id" + ',' + "object_name" + '\n')
            file1.close()
            file2.close()

            i = range(len(self.list_of_zips))
            p = Pool()
            m = Manager()
            l = m.Lock()
            func = partial(self.parse_Zip, l)
            p.map(func, i)
            p.close()
            p.join()
        print('Create .csv files time = ' + str(time() - t1) + 's')
Example #14
0
def zhongji_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(zhongji_entry, lock)
    list(pool.map(compose(func, get_zhongji), olts))
    pool.close()
    pool.join()
    ports = (x.split(',') for x in open(result_file))
    cmd = """match(n: Olt) where n.ip = {ip} 
    merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) 
    merge(m) - [:Include]->(p: Port{name: {interface}})"""
    list(
        map(
            lambda x: graph.cypher.execute(
                cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
Example #15
0
def run_post_process():
    es = ES(FLAGS.configfile_name)
    manager=Manager()
    lock=manager.Lock()
    shared_dict=manager.dict({'time':0,"id":""})
    process_num=int(cpu_count()-2)

    generator_list=[]
    for i in range(process_num):
        generator_list.append(_generator(lock,shared_dict,es))

    #%%
    p=[]
    for i in range(process_num):
        p.append(Process(target=_process_unknown_record,args=(generator_list[i],)))
        p[i].start()

    for q in p:
        q.join()
Example #16
0
def download_image_thread(location_q, image_q, MAX_DL_THREADS=10):
    print("Running Download Image Thread.")

    max_processes = MAX_DL_THREADS
    print("Creating a thread pool of size {} for downloading images...".format(max_processes))
    pool = Pool(processes=max_processes)
    # Allow us to have n processes runnning, and n processes scheduled to run
    # TODO: Manager is not necessary here, but is used to get around the fact
    # that thread-safe objects cannot be passed by reference, they must be
    # inheretence. A more lightweight solution should be found
    workers = Manager().Semaphore(max_processes*2)

    def async_download(location):
        image = download_image(location)
        image_q.put((location, image), True)
        workers.release()

    while True:
        location = location_q.get(True)
        workers.acquire()
        pool.apply_async(async_download, (location,))
def folderbase_cut_silence(input_folder, cut_interval):

    output_no_silence = os.path.join(input_folder, "remove_silence")
    # if not os.path.exists(output_folder):
    # 	os.mkdir(output_folder)
    if not os.path.exists(output_no_silence):
        os.mkdir(output_no_silence)
    wav_files = []
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            wav_files.append(filename)

    def process_files(lock, file):
        try:
            #exclude log.txt file
            if re.search(".+\.wav", file):
                wave_file = os.path.join(input_folder, file)
                wo_num = cut_wav_without_silence(wave_file, output_no_silence,
                                                 cut_interval)
                with cut_silence_file_num.get_lock():
                    cut_silence_file_num.value += 1
                with cut_silence_out_file_num.get_lock():
                    cut_silence_out_file_num.value += wo_num
                os.remove(wave_file)

        except Exception as e:
            logging.info(e)
            with cut_silence_fail_file.get_lock():
                cut_silence_fail_file.value += 1

    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(wav_files)
    pool.map(process_files, locks, wav_files)
    loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed
		'''.format(cut_silence_file_num.value, cut_silence_out_file_num.value,
             cut_silence_fail_file.value)
    logging.info(loginfo)
Example #18
0
    def __init__(self,
                 initial_patterns_list,
                 input_text_file,
                 data_source_type,
                 read_file_format='rb'):
        self.read_format = read_file_format
        self.pattern_to_data = {}  #defaultdict(PatternData)

        patterns_data = [
            PatternData(pattern) for pattern in initial_patterns_list
        ]
        self.pattern_to_data = Manager().dict(
            {pattern.formatted: pattern
             for pattern in patterns_data})

        patterns_len = [
            len(pattern.split()) for pattern in self.pattern_to_data.keys()
        ]
        self.min_pattern_len = min(patterns_len)
        self.max_pattern_len = max(patterns_len)
        self.data_wrapper = data_wrapper_factory(input_text_file,
                                                 data_source_type)
Example #19
0
def add_power_info():
    funcs = {
        'S8508': S85.get_power_info,
        'S8505': S85.get_power_info,
        'T64G': T64.get_power_info,
        'S8905': S89.get_power_info,
        'S8905E': S8905E.get_power_info,
        'S9306': S93.get_power_info,
        'S9303': S93.get_power_info
    }
    get_power_info = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model"
    )
    switches = [dict(ip=x['ip'], model=x['model']) for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_power_info, lock)
    list(pool.map(compose(_ff, get_power_info), switches))
    pool.close()
    pool.join()
Example #20
0
    def sample(self, n_samples: int, beta: float = 1.):
        with Manager() as mgr:
            queues_work = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)]
            queues_return = [mgr.Queue(maxsize=1) for _ in range(self.num_chains)]

            _ = self.parallel_pool.starmap(
                func=worker_init,
                iterable=[(queues_work[idx], queues_return[idx], idx, self.samplers[idx])
                          for idx in range(self.num_chains)])

            worker_results = [self.parallel_pool.apply_async(func=worker_run) for _ in range(self.num_chains)]

            swapped = [None for _ in self.samplers]
            last_samples = [None for _ in self.samplers]
            for i_sample in tqdm(range(int(n_samples))):
                logger.debug('MAIN PROCESS: deploying work...')
                for idx, beta in enumerate(self.betas):
                    queues_work[idx].put((idx, copy.deepcopy(swapped[idx]), beta, False))  # sample
                logger.debug('MAIN PROCESS: waiting for return...')
                for idx in range(len(self.samplers)):
                    idx, last_sample, beta = queues_return[idx].get()  # get sample
                    last_samples[idx] = last_sample
                logger.debug('MAIN PROCESS: swapping samples...')
                swapped = self.swap_samples(last_samples)  # swap samples
                logger.debug('MAIN PROCESS: swapping samples...')
                self.adjust_betas(i_sample, swapped, last_samples)  # adjust temps
            # logger.debug('stopping workers...')
            _ = [queues_work[idx].put((idx, None, 0.00, True)) for idx in range(self.num_chains)]
            _ = [queues_work[idx].join() for idx in range(self.num_chains)]
            # logger.debug('reached getting from finalqueue')
            for worker_result in worker_results:
                idx, sampler_obj = worker_result.get()
                logger.debug(f'GATHERED sampler {idx} trace_x: {len(sampler_obj.trace_x)}')
                self.samplers[idx] = sampler_obj

            self.parallel_pool.close()
            self.parallel_pool.join()
Example #21
0
def add_traffics():
    funcs = {
        'S8508': S85.get_traffics,
        'S8505': S85.get_traffics,
        'T64G': T64.get_traffics,
        'S8905': S89.get_traffics,
        'S8905E': S8905E.get_traffics,
        'S9306': S93.get_traffics,
        'S9303': S93.get_traffics
    }
    get_traffics = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model"
    )
    switchs = [
        dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes
    ]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_traffics, lock)
    list(pool.map(compose(_ff, get_traffics), switchs))
    pool.close()
    pool.join()
Example #22
0
def folderbase_convert_to_wave(webmfolder, wavefolder):
    def process_convert(lock, filename):
        my_logger.debug("filename is {}".format(filename))
        with total_num.get_lock():
            total_num.value += 1
        try:
            success = convert_to_wav(filename, wavefolder)
            with success_num.get_lock():
                success_num.value += success
            os.remove(filename)
        except Exception as e:
            line = "\t".join([str(datetime.datetime.now()), filename, str(e)])
            my_logger.info(line)
            fail_folder = "data/convert_failed"
            if not os.path.exists(fail_folder):
                os.mkdir(fail_folder)
            filebase = os.path.basename(filename)
            failed_file = os.path.join(fail_folder, filebase)
            os.rename(filename, failed_file)
            with fail_num.get_lock():
                fail_num.value += 1
        return 1

    filenames = []
    for file in mp3gen(webmfolder):
        if re.search("wav", file): continue
        filenames.append(file)
    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(filenames)
    pool.map(process_convert, locks, filenames)

    my_logger.info(
        "{}/{} files successfully converted to wave and {} files failed".
        format(success_num.value, total_num.value, fail_num.value))
def folderbase_cut_interval(input_folder, output_folder, cut_period):
    wav_files = []
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            wav_files.append(os.path.join(root, filename))


#	for file in wav_files:

    def process_files(lock, file):
        try:
            if re.search(".+\.wav", file):
                with file_num.get_lock():
                    file_num.value += 1
                filebasename = os.path.basename(file)
                filebasename, _ = os.path.splitext(filebasename)
                #get audio properties
                audio_prop = {}
                with wave.open(file, mode='rb') as newAudio:
                    audio_prop["nchannels"] = newAudio.getnchannels()
                    audio_prop["nframes"] = newAudio.getnframes()
                    audio_prop["sampwidth"] = newAudio.getsampwidth()
                    audio_prop["framerate"] = newAudio.getframerate()
                    audio_prop["comptype"] = newAudio.getcomptype()
                    audio_prop["compname"] = newAudio.getcompname()
                audio_duration = audio_prop["nframes"] / audio_prop["framerate"]

                precut_duration = cut_period
                cut_start = 0
                cut_return = 0
                cut_num = 0
                index = 0
                while cut_start < audio_duration:
                    cut_end = cut_start + precut_duration
                    cut_audio, cutaudio_prop = cut_wave(file,
                                                        cut_start,
                                                        cut_end,
                                                        start_bias=0,
                                                        end_bias=0)
                    newfile = os.path.join(
                        output_folder,
                        filebasename + "_" + str(index) + ".wav")
                    index += 1
                    with wave.open(newfile, "wb") as newAudio:
                        newAudio.setparams((cutaudio_prop["nchannels"],
                                            cutaudio_prop["sampwidth"],
                                            cutaudio_prop["framerate"],
                                            cutaudio_prop["nframes"],
                                            cutaudio_prop["comptype"],
                                            cutaudio_prop["compname"]))
                        newAudio.writeframes(cut_audio)
                    cut_start = cut_start + precut_duration
                    with out_file_num.get_lock():
                        out_file_num.value += 1
                os.remove(file)
        except Exception as e:
            logging.info(e)
            with fail_file.get_lock():
                fail_file.value += 1

    pool = Pool(process_num)
    m = Manager()
    lock = m.Lock()
    locks = [lock] * len(wav_files)
    pool.map(process_files, locks, wav_files)
    loginfo = '''Total number of audio files processed is {}, generated {} files and {} files failed
	'''.format(file_num.value, out_file_num.value, fail_file.value)
    logging.info(loginfo)
Example #24
0
# Settings: Location, Units, and rapidfire (optional)
latitude = check_env_var("HZN_LAT", printerr=True)
longitude = check_env_var("HZN_LON", printerr=True)
pws_units = check_env_var("PWS_UNITS", default='us', printerr=True)    # weewx recommends only using 'us'
pws_wu_loc = check_env_var("PWS_WU_LOC", default='', printerr=True)
pws_wu_rapidfire = check_env_var("PWS_WU_RPDF", default='False', printerr=True)

# Deal with a potential lower-case (boolean value from Horizon) or erroneous value
if pws_wu_rapidfire == "true" or pws_wu_rapidfire == "True": 
    pws_wu_rapidfire = "True"
else: 
    pws_wu_rapidfire = "False"


## Shared data structure (dict for flask server to read & serve)
manager = Manager()
sdata = manager.dict()
standard_params = ["wu_id", "stationtype", "model", "latitude", "longitude", "units", "location"]
standard_values = [pws_wu_id, pws_station_type, pws_model, latitude, longitude, pws_units, pws_wu_loc]
sdata["r"] = dict(zip(["status"], ["Station initializing..."]))
sdata["t"] = str(int(time.time()))                                      # Timestamp
sdata["i"] = dict(zip(standard_params, standard_values))                # Station Info

## Flask HTTPserver ----------------------------------------------------------
## Start simple flask server at localhost:port and pass in shared data dict
p_flask = Process(target=fl.run_server, args=('0.0.0.0', 8357, sdata))
p_flask.start()

## Weewx service -------------------------------------------------------------
# Modify the weewx configuration file with our env var settings
weemod = weewx_mod(weewx_config_file, pws_station_type)
    def predict(self, inputData, transientTime=0, update_processor=lambda x: x, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        manager = Manager()
        predictQueue = manager.Queue()

        # workaround as predict does not support batches atm
        # add dummy dimension to let embedInputData work properly (is optimized to work for batches)
        inputData = inputData.reshape(1, *inputData.shape)
        modifiedInputData = self._embedInputData(inputData)
        modifiedInputData = modifiedInputData[0]
        inputData = inputData[0]

        self.transientTime = transientTime
        self.sharedNamespace.transientTime = transientTime
        predictionOutput = B.zeros(np.insert(self.inputShape, 0, inputData.shape[0] - transientTime))

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[1:]]),
                        axis=rank).reshape(-1, rank).tolist()
        nJobs = len(jobs)

        self.resetState()

        iterator = PredictionArrayIterator(modifiedInputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_predictProcess,
                    initargs=[predictQueue, self])
        pool.map_async(self._predictProcess, iterator, chunksize=200)#, chunksize=1)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = predictQueue.get()
                # result of predicting
                indices, prediction, state = data
                id = self._uniqueIDFromIndices(indices)
                self._xs[id] = state
                # update the values
                predictionOutput[tuple([Ellipsis] + indices)] = prediction

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()

        return predictionOutput
    def fit(self, inputData, outputData, transientTime=0, verbose=0):
        rank = len(inputData.shape) - 1

        if rank != self.n_inputDimensions and rank != self.n_inputDimensions + 1:
            raise ValueError(
                "The `inputData` does not have a suitable shape. It has to have {0} spatial dimensions and 1 temporal dimension.".format(
                    self.n_inputDimensions))

        # reshape the input so that it has the shape (timeseries, time, input_dimension^n)
        if rank == self.n_inputDimensions:
            inputData = inputData.reshape(1, *inputData.shape)
            outputData = outputData.reshape(1, *outputData.shape)
        else:
            # modify rank again
            rank -= 1

        partialLength = (inputData.shape[1] - transientTime)
        totalLength = inputData.shape[0] * partialLength
        timeseriesCount = inputData.shape[0]

        manager = Manager()
        fitQueue = manager.Queue()

        modifiedInputData = self._embedInputData(inputData)

        self.sharedNamespace.transientTime = transientTime

        self.sharedNamespace.partialLength = partialLength
        self.sharedNamespace.totalLength = totalLength
        self.sharedNamespace.timeseriesCount = timeseriesCount

        jobs = np.stack(np.meshgrid(*[np.arange(x) + self._filterWidth for x in inputData.shape[2:]]),
                        axis=rank).reshape(-1, rank).tolist()

        nJobs = len(jobs)

        self.resetState()

        iterator = FittingArrayIterator(modifiedInputData, outputData, jobs, self._filterWidth, self._stride, self)

        pool = Pool(processes=self._nWorkers, initializer=SpatioTemporalESN._init_fitProcess, initargs=[fitQueue, self])
        pool.map_async(self._fitProcess, iterator, chunksize=16)

        def _processPoolWorkerResults():
            nJobsDone = 0

            if verbose > 0:
                bar = progressbar.ProgressBar(max_value=nJobs, redirect_stdout=True, poll_interval=0.0001)
                bar.update(0)

            while nJobsDone < nJobs:
                data = fitQueue.get()

                # result of fitting
                indices, x, WOut = data
                id = self._uniqueIDFromIndices(indices)

                if WOut is None:
                    import sys
                    print("WARNING: Fit process for pixel {0} did not succeed".format(indices), file=sys.stderr)

                # store WOut
                if self._averageOutputWeights:
                    if WOut is not None:
                        self._WOut += WOut / np.prod(self.inputShape)
                else:
                    self._WOuts[id] = WOut

                    # store x
                self._xs[id] = x

                nJobsDone += 1
                if verbose > 0:
                    bar.update(nJobsDone)
                    if verbose > 1:
                        print(nJobsDone)

            if verbose > 0:
                bar.finish()

        _processPoolWorkerResults()

        pool.close()
    def __init__(self, inputShape, n_reservoir,
                 filterSize=1, stride=1, borderMode="mirror", nWorkers="auto",
                 spectralRadius=1.0, noiseLevel=0.0, inputScaling=None,
                 leakingRate=1.0, reservoirDensity=0.2, randomSeed=None, averageOutputWeights=True,
                 out_activation=lambda x: x, out_inverse_activation=lambda x: x,
                 weightGeneration='naive', bias=1.0, outputBias=1.0,
                 outputInputScaling=1.0, inputDensity=1.0, solver='pinv', regressionParameters={}, activation=B.tanh,
                 activationDerivation=lambda x: 1.0 / B.cosh(x) ** 2):

        self._averageOutputWeights = averageOutputWeights
        if averageOutputWeights and solver != "lsqr":
            raise ValueError(
                "`averageOutputWeights` can only be set to `True` when `solver` is set to `lsqr` (Ridge Regression)")

        self._borderMode = borderMode
        if not borderMode in ["mirror", "padding", "edge", "wrap"]:
            raise ValueError(
                "`borderMode` must be set to one of the following values: `mirror`, `padding`, `edge` or `wrap`.")

        self._regressionParameters = regressionParameters
        self._solver = solver

        n_inputDimensions = len(inputShape)

        if filterSize % 2 == 0:
            raise ValueError("filterSize has to be an odd number (1, 3, 5, ...).")
        self._filterSize = filterSize
        self._filterWidth = int(np.floor(filterSize / 2))
        self._stride = stride

        self._n_input = int(np.power(np.ceil(filterSize / stride), n_inputDimensions))

        self.n_inputDimensions = n_inputDimensions
        self.inputShape = inputShape

        if not self._averageOutputWeights:
            self._WOuts = B.empty((np.prod(inputShape), 1, self._n_input + n_reservoir + 1))
            self._WOut = None
        else:
            self._WOuts = None
            self._WOut = B.zeros((1, self._n_input + n_reservoir + 1))
        self._xs = B.empty((np.prod(inputShape), n_reservoir, 1))

        if nWorkers == "auto":
            self._nWorkers = np.max((cpu_count() - 1, 1))
        else:
            self._nWorkers = nWorkers

        manager = Manager()
        self.sharedNamespace = manager.Namespace()
        if hasattr(self, "fitWorkerID") == False or self.parallelWorkerIDs is None:
            self.parallelWorkerIDs = manager.Queue()
            for i in range(self._nWorkers):
                self.parallelWorkerIDs.put((i))

        super(SpatioTemporalESN, self).__init__(n_input=self._n_input, n_reservoir=n_reservoir, n_output=1,
                                                spectralRadius=spectralRadius,
                                                noiseLevel=noiseLevel, inputScaling=inputScaling,
                                                leakingRate=leakingRate, reservoirDensity=reservoirDensity,
                                                randomSeed=randomSeed, out_activation=out_activation,
                                                out_inverse_activation=out_inverse_activation,
                                                weightGeneration=weightGeneration, bias=bias, outputBias=outputBias,
                                                outputInputScaling=outputInputScaling,
                                                inputDensity=inputDensity, activation=activation,
                                                activationDerivation=activationDerivation)

        """
Example #28
0
def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, html, screenshots, graph, nuke, whoxy_limit):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.

ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.

View the README for the full detailsand lists of API keys!

Note: If providing a scope file, acceptable IP addresses/ranges include:

    * Single Address:      8.8.8.8

    * Basic CIDR:          8.8.8.0/24

    * Nmap-friendly Range: 8.8.8.8-10

    * Underscores? OK:     8.8.8.8_8.8.8.10
    """
    click.clear()
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    verbose = None

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(report_path, output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain_list, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(organization, scope, ip_list,
                                      domain_list, whoxy_limit))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            more_jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green(
                "[+] Job's done! Your results are in {} and can be viewed and queried with \
any SQLite browser.".format(output_report)))

        if graph:
            graph_reporter = grapher.Grapher(output_report)
            print(
                green(
                    "[+] Loading ODIN database file {} for conversion to Neo4j"
                ).format(output_report))

            if nuke:
                confirm = input(
                    red("\n[!] You set the --nuke option. This wipes out all nodes \
for a fresh start. Proceed? (Y\\N) "))
                if confirm.lower() == "y":
                    graph_reporter.clear_neo4j_database()
                    print(green("[+] Database successfully wiped!\n"))
                    graph_reporter.convert()
                else:
                    print(
                        red("[!] Then you can convert your database to a graph database later. \
Run lib/grapher.py with the appropriate options."))
            else:
                graph_reporter.convert()

        if html:
            print(
                green("\n[+] Creating the HTML report using {}.".format(
                    output_report)))
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()
Example #29
0
def osint(self, organization, domain, files, ext, delete, scope_file, aws,
          aws_fixes, verbose, html, screenshots):
    """
The OSINT toolkit:\n
This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
provided and hunt for information. On the human side, ODIN looks for employee names,
email addresses, and social media profiles. Names and emails are cross-referenced with
HaveIBeenPwned, Twitter's API, and search engines to collect additional information.\n
ODIN also uses various tools and APIs to collect information on the provided IP addresses
and domain names, including things like DNS and IP address history.\n
View the README for the full detailsand lists of API keys!
    """
    asciis.print_art()
    print(green("[+] OSINT Module Selected: ODIN will run all recon modules."))

    if verbose:
        print(
            yellow(
                "[*] Verbose output Enabled -- Enumeration of RDAP contact information \
is enabled, so you may get a lot of it if scope includes a large cloud provider."
            ))

    # Perform prep work for reporting
    setup_reports(organization)
    report_path = "reports/{}/".format(organization)
    output_report = report_path + "OSINT_DB.db"

    if __name__ == "__main__":
        # Create manager server to handle variables shared between jobs
        manager = Manager()
        ip_list = manager.list()
        domain_list = manager.list()
        # Create reporter object and generate final list, the scope from scope file
        report = reporter.Reporter(output_report)
        report.create_tables()
        scope, ip_list, domain_list = report.prepare_scope(
            ip_list, domain_list, scope_file, domain)

        # Create some jobs and put Python to work!
        # Job queue 1 is for the initial phase
        jobs = []
        # Job queue 2 is used for jobs using data from job queue 1
        more_jobs = []
        # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
        even_more_jobs = []
        company_info = Process(name="Company Info Collector",
                               target=report.create_company_info_table,
                               args=(domain, ))
        jobs.append(company_info)
        employee_report = Process(name="Employee Hunter",
                                  target=report.create_people_table,
                                  args=(domain, organization))
        jobs.append(employee_report)
        domain_report = Process(name="Domain and IP Address Recon",
                                target=report.create_domain_report_table,
                                args=(scope, ip_list, domain_list, verbose))
        jobs.append(domain_report)

        shodan_report = Process(name="Shodan Queries",
                                target=report.create_shodan_table,
                                args=(ip_list, domain_list))
        more_jobs.append(shodan_report)
        urlcrazy_report = Process(name="Domain Squatting Recon",
                                  target=report.create_urlcrazy_table,
                                  args=(organization, domain))
        more_jobs.append(urlcrazy_report)

        cloud_report = Process(name="Cloud Recon",
                               target=report.create_cloud_table,
                               args=(organization, domain, aws, aws_fixes))
        even_more_jobs.append(cloud_report)

        if screenshots:
            take_screenshots = Process(name="Screenshot Snapper",
                                       target=report.capture_web_snapshots,
                                       args=(report_path, ))
            more_jobs.append(take_screenshots)

        if files:
            files_report = Process(name="File Hunter",
                                   target=report.create_foca_table,
                                   args=(domain, ext, delete, report_path,
                                         verbose))
            jobs.append(files_report)

        print(
            green(
                "[+] Beginning initial discovery phase! This could take some time..."
            ))
        for job in jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in jobs:
            job.join()

        print(
            green(
                "[+] Initial discovery is complete! Proceeding with additional queries..."
            ))
        for job in more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in more_jobs:
            job.join()

        print(green("[+] Final phase: checking the cloud and web services..."))
        for job in even_more_jobs:
            print(green("[+] Starting new process: {}".format(job.name)))
            job.start()
        for job in even_more_jobs:
            job.join()

        report.close_out_reporting()
        print(
            green("[+] Job's done! Your results are in {}.".format(
                output_report)))

        if html:
            html_reporter = htmlreporter.HTMLReporter(
                organization, report_path + "/html_report/", output_report)
            html_reporter.generate_full_report()
Example #30
0
    def _multi_channel_apply_disk_parallel(self, function, cleanup_function,
                                           output_path, from_time, to_time,
                                           channels, cast_dtype,
                                           pass_batch_info, pass_batch_results,
                                           processes, **kwargs):

        self.logger.debug('Starting parallel operation...')

        if pass_batch_results:
            raise NotImplementedError("pass_batch_results is not "
                                      "implemented on 'disk' mode")

        # need to convert to a list, oherwise cannot be pickled
        data = list(
            self.multi_channel(from_time, to_time, channels,
                               return_data=False))
        n_batches = self.indexer.n_batches(from_time, to_time, channels)

        self.logger.info('Data will be splitted in %s batches', n_batches)

        output_path = Path(output_path)

        # create local variables to avoid pickling problems
        _path_to_recordings = copy(self.path_to_recordings)
        _dtype = copy(self.dtype)
        _n_channels = copy(self.n_channels)
        _data_order = copy(self.data_order)
        _loader = copy(self.loader)
        _buffer_size = copy(self.buffer_size)

        reader = partial(RecordingsReader,
                         path_to_recordings=_path_to_recordings,
                         dtype=_dtype,
                         n_channels=_n_channels,
                         data_order=_data_order,
                         loader=_loader,
                         return_data_index=True)

        m = Manager()
        mapping = m.dict()
        next_to_write = m.Value('i', 0)

        def parallel_runner(element):
            i, _ = element

            res = util.batch_runner(element,
                                    function,
                                    reader,
                                    pass_batch_info,
                                    cast_dtype,
                                    kwargs,
                                    cleanup_function,
                                    _buffer_size,
                                    save_chunks=False,
                                    output_path=output_path)

            if i == 0:
                mapping['dtype'] = str(res.dtype)

            while True:
                if next_to_write.value == i:
                    with open(str(output_path), 'wb' if i == 0 else 'ab') as f:
                        res.tofile(f)

                    next_to_write.value += 1
                    break

        # run jobs
        self.logger.debug('Creating processes pool...')

        p = Pool(processes)
        res = p.map_async(parallel_runner, enumerate(data))

        finished = 0

        if self.show_progress_bar:
            pbar = tqdm(total=n_batches)

        if self.show_progress_bar:

            while True:
                if next_to_write.value > finished:
                    update = next_to_write.value - finished
                    pbar.update(update)
                    finished = next_to_write.value

                if next_to_write.value == n_batches:
                    break

            pbar.close()
        else:
            res.get()

        # save metadata
        params = util.make_metadata(channels, self.n_channels,
                                    mapping['dtype'], output_path)

        return output_path, params