예제 #1
0
def make_cooc_mat(num_procs = 4):
    cv = joblib.load(VOCAB_PATH)
    if num_procs > 1:
        pool = Pool(num_procs)
    analyzer = cv.build_analyzer()
    vocab = cv.vocabulary_
    dim = len(cv.vocabulary_)
    shared_params = dim, analyzer, vocab
    print('vocab size:', dim)
    cooc_mat = np.zeros((dim, dim), dtype = np.uint)
    sub_lines = []
    sub_blocks = []
    for line in bz2_line_gen(TEXT_PATH):
        if len(sub_lines) < 10000:
            sub_lines.append(line)
        elif num_procs == 1:
            cooc_mat += mp((0, shared_params, sub_lines))
            sub_lines = []
        elif len(sub_blocks) < num_procs:
            sub_blocks.append((len(sub_blocks), shared_params, sub_lines))
            sub_lines = []
        else:
            for cooc in pool.uimap(mp, sub_blocks):
                cooc_mat += cooc
            sub_blocks = []
            print('block')
    if sub_lines:
        cooc_mat += mp((0, shared_params, sub_lines))
    for sub_lines in sub_blocks:
        cooc_mat += mp((0, shared_params, sub_lines))
    print(np.sum(cooc_mat))
    joblib.dump(cooc_mat, COOC_MAT_PATH)
예제 #2
0
def run():
    """Run the previously declared experiments.

    You should call this exactly once at the end of the file.

    If ``dry_run`` is given as command line parameter, then the runs are
    not executed but the commands printed to ``stdout``.

    """
    if _state.run_was_called:
        _print_warning("run() was called more than once")
    _state.run_was_called = True

    _print_runs()

    if _is_selected("dry_run"):
        _run_dry()
        return

    _print_section("\nrunning the experiments:")

    for name, runs in _state.runs_by_name.items():
        if len(runs) == 0:
            continue
        # run in parallel
        orig_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
        pool = ProcessingPool(nodes=_cores)
        signal.signal(signal.SIGINT, orig_sigint_handler)
        try:
            for _ in tqdm.tqdm(
                    pool.uimap(_run_run, runs),
                    desc=name.ljust(_max_name_len()),
                    total=len(runs),
            ):
                pass
        except KeyboardInterrupt:
            _print_warning("aborted during experiment " + name)
예제 #3
0
        x
        for x in clean_filenames(filenames, json.load(open(AU_EMOTE_DICT_LOC)))
        if (clean_base(x) not in already_done_dirs and (
            x not in already_done_dict or (already_done_dict[x] > 0)))
    ]
    out_q = m.Queue()

    Thread(target=listener, args=(ALREADY_DONE_FILE, out_q)).start()

    # for filename in tqdm(filenames):
    # find_filename_data(AU_EMOTE_DICT_LOC, CLASSIFIER_LOC,
    # REAL_TIME_FILE_LOC, OUT_FILE_PATH, out_q, filename)

    f = functools.partial(find_filename_data, AU_EMOTE_DICT_LOC,
                          CLASSIFIER_LOC, REAL_TIME_FILE_LOC, OUT_FILE_PATH,
                          out_q)
    num_processes = 5
    with tqdm(total=len(filenames)) as pbar:
        p = Pool(num_processes)

    for iteration, _ in enumerate(p.uimap(f, enumerate(filenames))):
        pbar.update()

    # for filename in tqdm(filenames):
    # find_filename_data(AU_EMOTE_DICT_LOC, CLASSIFIER_LOC,
    # REAL_TIME_FILE_LOC, OUT_FILE_PATH, out_q,
    # (0, filename))
    out_q.put('kill')
    p.close()
    p.join()
예제 #4
0
def download(modename, polarity, year, full, test=False, mc=None, njobs=1):
    import root_pandas
    log.info('Getting data for {} {} {}'.format(
        modename, polarity, year))

    mode = get_mode(polarity, year, modename, mc)
    # I accidentally forgot the p in Dstp. Got to rename everything now for
    # this one exception. Hack incoming
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dst'

    sel = get_root_preselection.get(mode)

    # Always download the entire MC
    if full != 1 and mc is None:
        ctr = int(1./float(full))
        sel = '({} % {} == 0) && '.format(evt_num(), ctr) + sel
        log.info('Using ({} % {} == 0)'.format(evt_num(), ctr))

    tempfile.mktemp('.root')

    input_files = mode.get_file_list()
    if test:
        input_files = input_files[:4]
    chunked = list(helpers.chunks(input_files, 25))
    length = len(list(chunked))

    # While the code is in developement, just get any variables we can
    # access
    for part in mode.head.all_mothers() + mode.head.all_daughters():
        for func in variables.__all__:
            try:
                getattr(variables, func)(part)
            except variables.AccessorUsage:
                pass

    # Make some sorted variables. Saves the hassle when later training BDTs
    arg_sorted_ip = '{},{},{},{}'.format(
        *[ipchi2(p) for p in mode.D0.all_daughters()])
    arg_sorted_pt = '{},{},{},{}'.format(
        *[pt(p) for p in mode.D0.all_daughters()])

    add_vars = {
        'delta_m': '{} - {}'.format(m(mode.Dstp), m(mode.D0)),
        'delta_m_dtf': '{} - {}'.format(dtf_m(mode.Dstp), dtf_m(mode.D0)),
        'ltime_ratio': '{} / {}'.format(ltime(mode.D0), config.Dz_ltime),
        'ipchi2_1': 'ROOTex::Leading({})'.format(arg_sorted_ip),
        'ipchi2_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_ip),
        'ipchi2_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_ip),
        'ipchi2_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_ip),
        'pt_1': 'ROOTex::Leading({})'.format(arg_sorted_pt),
        'pt_2': 'ROOTex::SecondLeading({})'.format(arg_sorted_pt),
        'pt_3': 'ROOTex::ThirdLeading({})'.format(arg_sorted_pt),
        'pt_4': 'ROOTex::FourthLeading({})'.format(arg_sorted_pt),
    }
    variables_needed = list(variables.all_ever_used)

    if mc == 'mc':
        variables_needed.append('Dstp_BKGCAT')

    def run_splitter(fns):
        temp_file = tempfile.mktemp('.root')
        treesplitter(files=fns, treename=mode.get_tree_name(), output=temp_file,
                     variables=variables_needed, selection=sel,
                     addvariables=add_vars)
        return temp_file

    pool = ProcessingPool(njobs)
    temp_files = []
    for r in tqdm.tqdm(pool.uimap(run_splitter, chunked),
                       leave=True, total=length, smoothing=0):
        temp_files.append(r)

    log.info('Created {} temporary files.'.format(len(temp_files)))
    bcolz_folder = config.bcolz_locations.format(mode.get_store_name())

    try:
        log.info('Removing already existing data at {}'.format(
            bcolz_folder))
        shutil.rmtree(bcolz_folder)
    except OSError:
        log.info('No previous data found. Nothing to delete.')

    df_gen = root_pandas.read_root(temp_files, mode.get_tree_name(),
                                   chunksize=[500000, 100][args.test])

    # New storage using bcolz because better
    ctuple = None

    for df in df_gen:
        log.info('Adding {} events of {} to store {}.'.format(
            len(df), mode.get_tree_name(), bcolz_folder))
        if modename == 'WS' and year == 2016:
            new_names = {
                old: old.replace('Dst', 'Dstp')
                for old in df.columns if 'Dst' in old
            }
            df = df.rename(index=str, columns=new_names)
        if ctuple is None:
            ctuple = bcolz.ctable.fromdataframe(df, rootdir=bcolz_folder)
        else:
            ctuple.append(df.to_records(index=False))

    for f in temp_files:
        os.remove(f)
    # Loop and delete everything in the datastore that needs to be recached
    remove_buffer_for_mode(mode.mode)
    if modename == 'WS' and year == 2016:
        # As this is the start, hack name of the particle in the mode.
        mode.Dstp.name = 'Dstp'
예제 #5
0
#from multiprocessing import Pool
from pathos.multiprocessing import ProcessingPool as Pool


def f(x):
    return x * x


p = Pool()
for i in p.uimap(f, range(10)):
    print(i)
예제 #6
0
def main():
    """
        Hardcoded parameters for Ramstake
    """
    rs_codeword_length = 255 * 8
    modulus_bitsize = 756839
    q = 2**modulus_bitsize - 1

    # Parse arguments
    parser = argparse.ArgumentParser("Secret Estimator. Compute either estimator parameters or estimate secrets based on a set of failures (requires precomputed parameters).")
    parser.add_argument("-p", "--params", nargs=2, dest="params", metavar=("samplesfail", "samplessucces"), help="Compute estimator parameter using random decryption failures (filename) and successes (filename).")
    parser.add_argument("-s", "--secrets", nargs=1, dest="secrets", metavar=("estimator"), required=True, help="Compute estimates of the secrets using a set of decryption failures (filename).")
    args = parser.parse_args() 

    print "---------------------------------"
    print " modulus_bitsize: " + str(modulus_bitsize)
    print "---------------------------------"

    if args.params:
        print " Calculating estimations from samples..."
        est = calculateestimator(args.params[0], args.params[1], modulus_bitsize, q, rs_codeword_length)
        np.save('estimated_params', est)

    """
        If --params not given as argument, file 'estimates_params.npy' should exist is same directory 
    """
    if not os.path.exists('estimated_params.npy'):
        print('Parameters from sampling not yet estimates. Please specify -p sample_fail samples_success')
        return 0

    est = np.load('estimated_params.npy')

    """
        Estimate secrets.
    """
    if args.secrets:
        #estimate a and b
        # b=1; a=0
        for aorb in [1, 0]:
            samples = getsamples(args.secrets[0], modulus_bitsize)

            # skip the secret, save and then delete
            secret = samples.next()
            np.save('secret-a',secret[1])
            np.save('secret-b',secret[2])
            del secret

            # get a standard probability estimate of the zero's and ones
            prob = np.ones(modulus_bitsize)

            # loop over all the samples, and estimate the required change
            from pathos.multiprocessing import ProcessingPool as Pool
            pool = Pool(nodes=4)
            
            f = lambda x: getestimate(x[aorb], est, modulus_bitsize, q, rs_codeword_length)

            print 'start'
            for i in tqdm(pool.uimap(f, samples)):
                prob = prob * i
                np.save('estimate-tmp',prob)

            if aorb==1:
                tmp = 'b'
            else:
                tmp = 'a'
            np.save('estimate-'+tmp,prob)
예제 #7
0
def do_parallel(function, list, ncores=28):
    p = Pool(ncores)
    for _ in tqdm(p.uimap(function, list), total=len(list)):
        pass