コード例 #1
0
def main():
    base_filename = "../plots/survival/{}.pdf"

    survival_functions = [(sv.FractionOldNew, 'FractionNew'),
                          (sv.OldNewSurvival, 'OldNewMix'),
                          (sv.OldWaning, 'OldWaning')]

    p = Pool()
    p.map(run_survival_function, survival_functions)
コード例 #2
0
ファイル: task51.py プロジェクト: fresh1987/multiprocessing
 def createZips(self):
     t1 = time()
     if __name__ == '__main__':
         self.get_list_of_id()  # get set of string id
         p = Pool()
         p.map(self.createZip, range(self.count_zips))
         p.close()
         p.join()
     print('Create .zip files time = ' + str(time() - t1) + 's')
コード例 #3
0
def fmultiprocess(log, function, inputArray, poolSize=False, **kwargs):
    """multiprocess pool

    **Key Arguments:**
        - ``log`` -- logger
        - ``function`` -- the function to multiprocess
        - ``inputArray`` -- the array to be iterated over

    **Return:**
        - ``resultArray`` -- the array of results

    **Usage:**

        .. code-block:: python 

            from fundamentals import multiprocess
            # DEFINE AN INPUT ARRAY
            inputArray = range(10000)
            results = multiprocess(log=log, function=functionName,
                                  inputArray=inputArray, otherFunctionKeyword="cheese")
    """
    log.info('starting the ``multiprocess`` function')

    # DEFINTE POOL SIZE - NUMBER OF CPU CORES TO USE (BEST = ALL - 1)
    # if cpu_count() > 1:
    #     poolSize = cpu_count() - 1
    # else:
    #     poolSize = 1

    # if len(inputArray) < poolSize:
    #     poolSize = len(inputArray)
    if poolSize:
        p = Pool(processes=poolSize)
    else:
        p = Pool()

    # MAP-REDUCE THE WORK OVER MULTIPLE CPU CORES
    try:
        mapfunc = partial(function, log=log, **kwargs)
        resultArray = p.map(mapfunc, inputArray)
    except:
        try:
            mapfunc = partial(function, **kwargs)
            resultArray = p.map(mapfunc, inputArray)
        except:
            mapfunc = partial(function, log=log, **kwargs)
            resultArray = p.map(mapfunc, inputArray)

    p.close()
    p.terminate()
    p.join()

    log.info('completed the ``multiprocess`` function')
    return resultArray
コード例 #4
0
 def extract_patterns_matching_async(self):
     startTime = time.time()
     print "running on {} processors".format(WORKERS)
     pool = Pool(processes=WORKERS,
                 initargs=(sent_locker, lock, sentence_counter))
     pool.map(self.extract_patterns_from_file,
              self.data_wrapper.ngrams_files)
     pool.close()
     pool.join()
     total_time = time.time() - startTime
     print "extract_patterns_matching_async running time: {}".format(
         total_time)
コード例 #5
0
    def convertpool(self):

        if len(self.todo) > 0:

            if self.type in [".h264", ".mp4", ".avi"]:

                pool = Pool(min(self.pools, len(self.todo)))
                try:
                    pool.map(self.conv_single, self.todo)
                    pool.close()
                    lineprint("Done converting all videofiles!")
                except KeyboardInterrupt:
                    lineprint("User terminated converting pool..")
                    pool.terminate()
                except Exception as e:
                    excep = "Got exception: %r, terminating pool" % (e, )
                    lineprint(excep)
                    pool.terminate()
                finally:
                    pool.join()

                if self.delete:
                    for filein in self.todo:
                        os.remove(filein)
                    lineprint("Deleted all original videofiles..")

            elif self.type in [".jpg", ".jpeg", ".png"]:

                vidname = commonpref(self.todo)
                lineprint("Start converting " + str(len(self.todo)) +
                          " images")

                frame_array = []
                for filename in self.todo:
                    frame = cv2.imread(filename)
                    frame_array.append(frame)
                    #os.rename(filename, self.outdir+"/"+filename)
                h, w, _ = frame_array[0].shape
                if self.outdir != "":
                    vidname = self.outdir + "/" + os.path.basename(vidname)
                vidout = videowriter(vidname, w, h, self.imgfps,
                                     self.resizeval)
                for i in range(len(frame_array)):
                    vidout.write(frame_array[i])
                vidout.release()
                lineprint("Finished converting " + os.path.basename(vidname))

            else:
                lineprint("No video or image files found..")
コード例 #6
0
def main():
    workers = 5
    while True:
        try:
            # Worklist contains data to be distributed
            worklist = []
            # Launch workers
            process = Pool(workers)
            # Map data to worker_main function
            process.map(worker_main, worklist)
            # Block until all work completed
            process.close()
            process.join()
        except Exception as ex:
            print(str(ex))
コード例 #7
0
ファイル: probability.py プロジェクト: choandrew/poker
def calculate_prob(hole_cards, num_iterations, given_board):
    import itertools
    
    #must pip these library
    from multiprocess import Pool
    import dill as pickle

    #creates 4 threads
    p = Pool(4)

    deck_cards = prob_functions.generate_deck(hole_cards)
    possible_card_pairings = tuple(itertools.combinations(deck_cards, 2))
    card_combos = map( lambda x: tuple (list(hole_cards) + [x]), possible_card_pairings)

    s = pickle.dumps(lambda hc: single_prob(hc, num_iterations, given_board))
    f = pickle.loads(s)

    prob_list = p.map( f , card_combos)

    tie = 0
    win = 0
    for prob in prob_list:
        tie += prob[0] 
        win += prob[1]
    l = len(prob_list)
    tie = tie / l
    win = win / l

    return (tie,win)
コード例 #8
0
def compute_jaccard_list_vs_all(seed_indices):
    pool = Pool(14)
    affinities_to_seeds = np.array(
        pool.map(lambda i: compute_jaccard_i_vs_all(i), seed_indices))
    pool.close()
    pool.join()
    return affinities_to_seeds
コード例 #9
0
def compute_jaccard_pairwise(indices,
                             square_form=True,
                             parallel=True,
                             return_poses=False):
    n = len(indices)

    if parallel:
        pool = Pool(16)
        scores_poses_tuples = pool.map(
            lambda x: compute_jaccard_i_vs_list(x[0], x[1]),
            [(indices[i], indices[i + 1:]) for i in range(n)])
        pool.close()
        pool.join()
    else:
        scores_poses_tuples = [
            compute_jaccard_i_vs_list(indices[i], indices[i + 1:])
            for i in range(n)
        ]

    pairwise_scores = np.array(
        [scores for scores, poses in scores_poses_tuples])

    if square_form:
        pairwise_scores = squareform(np.concatenate(pairwise_scores))

    if return_poses:
        poses = np.array([poses for scores, poses in scores_poses_tuples])
        return pairwise_scores, poses
    else:
        return pairwise_scores
コード例 #10
0
    def create_csv(self):
        t1 = time()
        file1 = open(self.out_csv1, "w")
        file1.write("id" + ',' + "level" + '\n')
        file2 = open(self.out_csv2, "w")
        file2.write("id" + ',' + "object_name" + '\n')
        file1.close()
        file2.close()

        if __name__ == '__main__':
            i = range(len(self.list_of_zips))
            p = Pool()
            p.map(self.parse_Zip, i)
            p.close()
            p.join()
        print('Create .csv files time = ' + str(time() - t1) + 's')
コード例 #11
0
    def main(self):

        urls = self.generateListOfUrls(self.WEBSITE)

        pool = Pool(5)
        p_map = pool.map(self.processPage, urls)
        self.saveLinks(p_map)
コード例 #12
0
def load_scoremaps_multiple_sections_parallel(sections, stack, structure, downscale, detector_id):
    pool = Pool(12)
    scoremaps = pool.map(lambda sec: load_scoremap_worker(stack, sec, structure, downscale, detector_id=detector_id),
                                     sections)
    pool.close()
    pool.join()
    return {sec: sm for sec, sm in zip(sections, scoremaps) if sm is not None}
コード例 #13
0
ファイル: probability.py プロジェクト: choandrew/poker
def calculate_prob(hole_cards, num_iterations, given_board):
    import itertools

    #must pip these library
    from multiprocess import Pool
    import dill as pickle

    #creates 4 threads
    p = Pool(4)

    deck_cards = prob_functions.generate_deck(hole_cards)
    possible_card_pairings = tuple(itertools.combinations(deck_cards, 2))
    card_combos = map(lambda x: tuple(list(hole_cards) + [x]),
                      possible_card_pairings)

    s = pickle.dumps(lambda hc: single_prob(hc, num_iterations, given_board))
    f = pickle.loads(s)

    prob_list = p.map(f, card_combos)

    tie = 0
    win = 0
    for prob in prob_list:
        tie += prob[0]
        win += prob[1]
    l = len(prob_list)
    tie = tie / l
    win = win / l

    return (tie, win)
コード例 #14
0
ファイル: preprocessing.py プロジェクト: yoskitar/Authorship
def parallel_apply(df, func, n_cores, n_jobs):
    df_split = np.array_split(df, n_jobs)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return (df)
コード例 #15
0
def calculate_expected(uri, chroms, maxdis=2000000, balance=True, nproc=1):

    # B: Block Bias, constant for each copy number pair
    hic_pool = cooler.Cooler(uri)
    res = hic_pool.binsize
    maxdis = maxdis // res
    args = []
    for c in chroms:
        args.append((hic_pool, c, maxdis, balance))

    # Allocate processes
    if nproc == 1:
        results = list(map(_expected_core, args))
    else:
        pool = Pool(nproc)
        results = pool.map(_expected_core, args)
        pool.close()
        pool.join()

    expected = {}
    for i in range(1, maxdis + 1):
        nume = 0
        denom = 0
        for extract in results:
            if i in extract:
                nume += extract[i][0]
                denom += extract[i][1]
        if nume > 0:
            expected[i] = nume / denom

    return expected
コード例 #16
0
    def fit(self, frame=None, bootstrap=False, n_iter=200):
        """
        Fit Exponential Model
        """

        if not isinstance(frame, pd.core.frame.DataFrame):
            frame = self.infections.copy()
        if bootstrap:
            p = Pool()
            bootstrapped_lams = p.map(self.fit, bootstrap_frame(frame, n_iter))

        # generate durations and initial guess
        l1_d, l2_d, durations = self.GetInfectionDurations(frame)
        lam = np.random.random()

        # run minimization of negative log likelihood
        opt = minimize(decay_function,
                       lam,
                       args=(l1_d, l2_d),
                       method='L-BFGS-B',
                       bounds=((1e-6, None), ))
        self.optimizers.append(opt)
        self.estimated_lam = opt.x[0]

        if bootstrap:
            self.bootstrapped_lams = np.array(bootstrapped_lams)
            return (self.estimated_lam, self.bootstrapped_lams)
        else:
            return self.estimated_lam
コード例 #17
0
def main():
    xtcfp, top, sel, outp, T, dt, cf = parse_args()

    if os.path.isfile(xtcfp) and xtcfp.endswith(".xtc"):
        #seq = cal_rmsdmatrix(xtcfp, cf, top, sel, dt)
        assign_(xtcfp, cf, top, sel, dt=dt)
    else:
        if not os.path.exists(outp):
            os.mkdir(outp)
        cwd0 = os.getcwd()
        os.chdir(outp)
        if xtcfp.endswith("pkl3") or xtcfp.endswith("pkl") or xtcfp.endswith(
                "pkl2"):
            meta = pd.read_pickle(xtcfp)
            xtcfs = meta["traj_fn"].to_numpy()
            top = meta["top_fn"].iloc[0]
        else:
            xtcfs = [
                os.path.join(xtcfp, _) for _ in os.listdir(xtcfp)
                if _.endswith(".xtc")
            ]
        n_trajs = len(xtcfs)
        dtrajs = []
        if T > 1:
            pool = Pool(T)
            args = [(xtcfs[i], cf, top, sel, dt) for i in range(n_trajs)]
            dtrajs = pool.map(parallel, args)
        else:
            for i in range(n_trajs):
                dtraj = assign_(xtcfs[i], cf, top, sel, dt=dt, outfname=None)
                dtrajs.append(dtraj)
        os.chdir(cwd0)
        pd.to_pickle(dtrajs, "dtrajs.pkl3")
コード例 #18
0
    def run_parallel_fep(self, mutant_params, system_idx, mutant_idx, n_steps,
                         n_iterations, windows):
        logger.debug('Computing FEP for {}...'.format(self.name))
        if not self.opt:
            mutant_systems = mutant_params.build_fep_systems(
                system_idx, mutant_idx, windows)
        else:
            mutant_systems = mutant_params

        nstates = len(mutant_systems)
        chunk = math.ceil(nstates / self.num_gpu)
        groups = grouper(range(nstates), chunk)
        pool = Pool(processes=self.num_gpu)

        system = copy.deepcopy(self.wt_system)
        box_vectors = self.input_pdb.topology.getPeriodicBoxVectors()
        system.setDefaultPeriodicBoxVectors(*box_vectors)
        system.addForce(
            mm.MonteCarloBarostat(1 * unit.atmospheres,
                                  self.temperature * unit.kelvin, 25))  ###

        fep = partial(run_fep,
                      sim=self,
                      system=system,
                      pdb=self.extended_pdb,
                      n_steps=n_steps,
                      n_iterations=n_iterations,
                      all_mutants=mutant_systems)
        u_kln = pool.map(fep, groups)
        pool.close()
        pool.join()
        pool.terminate()
        ddg = FSim.gather_dg(self, u_kln, nstates)

        return ddg
コード例 #19
0
 def get_new_tickets(self, from_time=utils.pre_day_to_string(1)):
     search_conditions = {
         "skip": 0,
         "query": {
             "ctimeGte": "{}T21:00:00.000Z".format(from_time)
         }
     }
     pool_size = multiprocess.cpu_count()
     pool_volume = 10 * pool_size
     index = 0
     tickets_num = self._get_number_of_tickets(from_time, to_time)
     req_num = utils.ceil_division(tickets_num, 1000)
     pool = Pool(pool_size)
     for req_count in range(req_num):
         search_tickets = self.search_tickets(search_conditions)
         while True:
             tickets = pool.map(
                 self.add_attr_to_ticket,
                 itertools.islice(search_tickets, pool_volume))
             if tickets:
                 print('Downloaded {}/{} tickets'.format(
                     index, tickets_num),
                       end='\r')
                 index += pool_volume
                 yield tickets
             else:
                 break
         search_conditions['skip'] += 1000
コード例 #20
0
def main():
    model = Doc2Vec.load("/usr/local/apsis/queries/title_model")
    do_shingle = True
    print(scopus_doc.objects.filter(TI__isnull=True).count())
    if do_shingle:
        unshingled_s_docs = scopus_doc.objects.filter(shingle__exists=False)
        print(unshingled_s_docs.count())
        for sd in unshingled_s_docs:
            if not hasattr(sd, 'TI'):
                print(sd)
                sd.delete()
            else:
                #try:
                sd.shingle = list(shingle(sd.TI, 2))
                sd.save()
            #except:
            #    pass

    scopus_docs_all = scopus_doc.objects.filter(shingle__exists=True,
                                                DO__exists=True,
                                                doc2vec_checked=False)
    s_docs_i = scopus_docs_all.count()

    #s_docs_i = 10

    chunk_size = 3

    #similarity.objects.all().delete()

    for i in range(s_docs_i // chunk_size + 1):
        print(i)
        #t0 = time.time()
        f = i * chunk_size
        l = (i + 1) * chunk_size
        if l > s_docs_i:
            l = s_docs_i - 1
        s_docs = scopus_docs_all[f:l]

        print(s_docs)

        # initialise an empty list, and append sim items to it in parallel
        sims = []
        pool = Pool(processes=chunk_size)
        sims.append(pool.map(partial(compare, model=model), s_docs))
        pool.terminate()

        #sims = [item for sublist in sims for item in sublist]
        #try:
        #    sims = [item for sublist in sims for item in sublist]
        #except:
        #    pass

        # Flatten and remove nones
        #print(sims)
        sims = flatten(sims)
        sims = list(filter(None.__ne__, sims))

        similarity.objects.insert(sims)

        s_docs.update(doc2vec_checked=True)
コード例 #21
0
ファイル: citation_matrix.py プロジェクト: JonathanBusch/tmv
    def handle(self, *args, **options):
        qid = options['qid']

        q = Query.objects.get(pk=qid)
        docs = Doc.objects.filter(query=q,
                                  wosarticle__cr__isnull=False,
                                  cdo__citation__isnull=True)

        docs = docs

        ndocs = docs.count()

        print(ndocs)

        # Chunk size, so as to prevent overuse of memory
        chunk_size = 1000

        for i in range(ndocs // chunk_size + 1):
            cdos = []
            f = i * chunk_size
            print(f)
            l = (i + 1) * chunk_size
            if l > ndocs:
                l = ndocs - 1
            chunk_docs = docs[f:l]
            pool = Pool(processes=4)
            cdos.append(pool.map(doc_cites, chunk_docs))
            pool.terminate()
            gc.collect()

            django.db.connections.close_all()
            cdos = flatten(cdos)

            CDO.objects.bulk_create(cdos)
コード例 #22
0
def find_day_series(df,
                    day,
                    tol,
                    min_occurrence,
                    is_departure: bool,
                    num_procs=1):

    df_day = df[df["week day"] == day].copy()
    date_num = dict(
        zip(np.sort(df_day.day.unique()), range(len(df_day.day.unique()))))
    df_day["day_num"] = df_day.day.apply(lambda d: date_num[d])
    series = df_day.series.unique()
    len_tot = series.shape[0]
    len_slice = len_tot // num_procs
    split_series = [i * len_slice for i in range(num_procs)] + [len_tot]
    split_flights = tuple([(series[split_series[i]:split_series[i + 1]],
                            df_day[df_day.series.isin(
                                series[split_series[i]:split_series[i + 1]])],
                            tol, min_occurrence, is_departure)
                           for i in range(num_procs)])

    pool = Pool(num_procs)
    result = pool.map(compute_series, split_flights)
    final_df = pd.concat(result, ignore_index=True)
    pool.close()
    pool.join()

    return final_df
コード例 #23
0
    def _train_batch_parallelize(self, trees, n_incorrect_answers):
        """Parallelizes training for a list of trees.
        Uses the number of threads given by multiprocessing.cpu_count()

        Updates model parameters directly, and returns batch error.
        """
        # Defaults to using cpu_count() threads
        pool = Pool()
        
        def get_subbatch_deltas(_trees):
            return self._train_batch(_trees, n_incorrect_answers, 
                                     apply_learning=False)

        subbatches = utils.split(trees, n_slices=cpu_count())

        # result will be a list of tuples (error, deltas)
        result = pool.map(get_subbatch_deltas, subbatches)

        # no more processes accepted by this pool
        pool.close()   
        # Wait until mapping is completed
        pool.join()

        error = sum([r[0] for r in result])
        deltas = [r[1] for r in result]
        for (delta_Wv, delta_b, delta_We, delta_Wr) in deltas:
            self.Wv -= delta_Wv
            self.b -=  delta_b
            self.We -= delta_We
            self.Wr -= delta_Wr

        return error
コード例 #24
0
def main():

    #match.objects.all().delete()
    s_docs_count = scopus_doc.objects.filter(DO__exists=True,shingle__exists=True).count()
    print(s_docs_count)

    model = Doc2Vec.load("/queries/title_model")

    s_docs_count = 10025

    chunk_size= 10000

    for i in range(s_docs_count//chunk_size+1):
        f = i*chunk_size
        l = (i+1)*chunk_size-1
        if l > s_docs_count:
            l = s_docs_count-1
        s_docs = scopus_doc.objects.filter(DO__exists=True,shingle__exists=True)[f:l]

        matches = []
        sims = []
        pool = Pool(processes=16)
        #matches.append(pool.map(partial(find_match,),s_docs))
        sims.append(pool.map(partial(find_sim,),s_docs))
        pool.terminate()
        #matches = [x for x in matches[0] if x is not None]
        #matches = list(filter(None.__ne__, matches[0]))
        sims = list(filter(None.__ne__, sims[0]))
        #match.objects.insert(matches)
        similarity.objects.insert
コード例 #25
0
    def get_panorama(self, fname, pano_id, zoom_level=3):
        server_url = 'http://cbk%d.google.com/' % randint(0,3)
        pano_url = server_url + 'cbk?output=tile&panoid=%s&zoom=%d&x=%d&y=%d'
        zoom_sizes = {3:(7,4), 4:(13,7), 5:(26,13)}
        max_x, max_y = zoom_sizes[zoom_level]

        jobs = []
        for y in xrange(max_y):
            for x in xrange(max_x):
                tile_url = pano_url % (pano_id, zoom_level, x, y)
                jobs.append(tile_url)

        p = Pool(len(jobs))
        tiles = p.map(self.get_tile, jobs)
        p.close()

        if all(x.size for x in tiles):
            tiles = np.array(tiles)
            strips = []
            for y in xrange(max_y):
                strips.append(np.hstack(tiles[y*max_x:(y+1)*max_x,:,:,:]))
            pano = np.vstack(strips)
            pano = pano[0:1664, 0:3328]
        else:
            pano = np.array([])
        return pano
コード例 #26
0
ファイル: parallel.py プロジェクト: rajeshk-mishra/dmrgpy
def pcall_mp(fun, args, cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores)  # create pool
    out = mainpool.map(fun, args)  # return list
    mainpool.terminate()
    del mainpool  # delete pool
    return out
コード例 #27
0
    def run(self):
        """
        This functions reads the feature extraction filelist and creates a pool of processes to extract features
        from distinct files in parallel. It outputs one pymir3 FeatureTrack file per input file. Output is buffered
        to save memory and defer disk access.

        .. note::
            These keys are expected to be set in the experiment file:
                * ['general']['feature_extraction_filelist']
                * ['general']['scratch_directory']
                * ['feature_extraction']['output_buffer_size']
                * ['feature_extraction']['worker_extractors']

        """

        print("Running feature extraction behavior: %s" % self.name)

        # todo: use metadata file to add labels to track metadata (if available)
        # deve garantir a label no metadados pra facilitar a vida, ao invés de usar o nome do arquivo (acho que não precisa)

        with open(self.params['general']['feature_extraction_filelist']) as f:
            files = f.read().splitlines()

        # todo: usar um multiprocessing.manager pra realizar o compatilhamento do buffer (ao invés de fazer por chunks, como abaixo)

        metas = copy.copy(files)
        files = []
        for i in metas:
            files.append(i.split("\t")[0])
        metas = []

        num_files = len(files)
        output_buffer_size = self.params['feature_extraction']['output_buffer_size']

        pool = Pool(processes=self.params['feature_extraction']['worker_extractors'])
        for i in range(0, num_files, output_buffer_size):
            print "processing files %d through %d of %d" % (i + 1, min(i + output_buffer_size, num_files), num_files)
            result = pool.map(self.extract, files[i:min(i + output_buffer_size, num_files)])

            T0 = time.time()
            for track in result:
                filename = acf_utils.extract_filename(track.metadata.filename, "wav") + ".features"
                filename = self.params['general']['scratch_directory'] + "/" + filename

                print "writing features to file %s..." % (filename)
                feature_file = open(filename, "w")
                track.save(feature_file)
                feature_file.close()
                del track
            T1 = time.time()
            print "writing feature files to disk took %f seconds" % (T1 - T0)

            del result
            gc.collect()

        pool.close()
        pool.join()

        print ('Feature extraction done!')
コード例 #28
0
def pcall_mp(fun, args, cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores)  # create pool
    #    print("Using",cores,"cores")
    out = mainpool.map(fun, args)  # return list
    mainpool.terminate()  # clear the pool
    del mainpool  # delete pool
    return out
コード例 #29
0
def ospf_check():
    clear_log()
    devices = [x.split(',')[0] for x in open(devicesFile)]
    pool = Pool(processor)
    lock = Manager().Lock()
    list(pool.map(partial(_inf_ospf_check, lock), devices))
    pool.close()
    pool.join()
コード例 #30
0
ファイル: parallel.py プロジェクト: joselado/pygra
def pcall_mp(fun,args,cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores) # create pool
#    print("Using",cores,"cores")
    out = mainpool.map(fun,args) # return list
    mainpool.terminate()
    del mainpool # delete pool
    return out
コード例 #31
0
def Multiprocessed_OCRPDF(
        source="",
        targetPath=None,
        processes=4,
        nice=5,
        verbose=False,
        tesseract_config='--oem 1 -l best/eng -c preserve_interword_spaces=1 textonly_pdf=1',
        logger=None):

    if isinstance(source, str):
        if verbose:
            (
                logger.info if logger else print
            )("You passed a string in as source. Trying this as source pdf file path."
              )
        page_count = PyPDF2.PdfFileReader(source).getNumPages()
    else:
        if verbose:
            (logger.info if logger else
             print)("OCRUSREX - Try extracting Images from bytes object")
        page_count = PyPDF2.PdfFileReader(io.BytesIO(source)).getNumPages()

    output = PyPDF2.PdfFileWriter()

    # set up a multiprocess pool with the specified number of processes. Then call the single-threaded OCRPDF pethod
    # on each page
    p = Pool(processes)
    for ocred_page in p.map(
            lambda p: OCRPDF(source=source,
                             verbose=verbose,
                             nice=nice,
                             page=p + 1,
                             tesseract_config=tesseract_config,
                             logger=logger), range(0, page_count)):
        output.addPage(PyPDF2.PdfFileReader(io.BytesIO(ocred_page)).getPage(0))

    if verbose:
        (logger.info if logger else print)("Multithreaded Execution Complete!")

    # If targetPath was provided, assume that it's a string and valid path. Try to write.
    if targetPath:
        outputStream = open(targetPath, "wb")
        output.write(outputStream)
        outputStream.close()
        # upon success, return truthy values (in this case, True)
        return True

    # otherwise, return results as bytes obj
    else:
        output_file_obj = io.BytesIO()
        output.write(output_file_obj)
        return output_file_obj.getvalue()

    if verbose:
        (logger.info if logger else print)(
            "Complete! Elapsed time: {0}".format(end - start))
コード例 #32
0
def get_gameplays():

    PlayTypeDict = {}

    PlayTypeStrings = {
        'Pass': ['pass incomplete', 'pass complete', 'sacked'],
        'Admin': ['spiked the ball', 'Timeout', 'Penalty', 'aborted'],
        'Kneel': ['knee', 'knelt'],
        'Punt': ['Punts'],
        'Field Goal': ['field goal', 'no good'],
        'Special Teams':
        ['kicks off', 'kicks onside', 'extra point', 'two point'],
        'Run': [
            'left end', 'right end', ' for ', 'up the middle', 'middle for',
            'left tackle', 'left guard', 'right guard', 'right tackle'
        ],
    }

    YearStart = 1998
    YearsToGo = 20
    for Year in range(YearStart, YearStart + YearsToGo):

        PlayTypeCounts = {
            'Pass': 0,
            'Run': 0,
            'Punt': 0,
            'Field Goal': 0,
            'Admin': 0,
            'Kneel': 0,
            'Special Teams': 0
        }
        for GameNumber in range(1, 17):
            print('Game', GameNumber, 'in', Year, 'Time: ', datetime.now())

            PlayTypeDict = {}
            PathList = []
            for Team in TeamLookup:
                for GameLocation in ['H', 'A']:
                    path = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=pfr&url=%2Fplay-index%2Fplay_finder.cgi%3Frequest%3D1%26match%3Dall%26year_min%3D{YEAR}%26year_max%3D{YEAR}%26game_type%3DR%26game_num_min%3D{GameNumber}%26game_num_max%3D{GameNumber}%26week_num_min%3D0%26week_num_max%3D99%26game_location%3D{GameLocation}%26minutes_max%3D15%26seconds_max%3D0%26minutes_min%3D0%26seconds_min%3D0%26team_id%3D{TEAM}%26field_pos_min_field%3Dteam%26field_pos_max_field%3Dteam%26end_field_pos_min_field%3Dteam%26end_field_pos_max_field%3Dteam%26type%255B%255D%3DPASS%26type%255B%255D%3DRUSH%26type%255B%255D%3DPUNT%26type%255B%255D%3DKOFF%26type%255B%255D%3DONSD%26type%255B%255D%3DFG%26type%255B%255D%3DXP%26type%255B%255D%3D2PC%26no_play%3DN%26turnover_type%255B%255D%3Dinterception%26turnover_type%255B%255D%3Dfumble%26score_type%255B%255D%3Dtouchdown%26score_type%255B%255D%3Dfield_goal%26score_type%255B%255D%3Dsafety%26order_by%3Dyds_to_go&div=div_all_plays&del_col=1,11,12,13,14'.format(
                        YEAR=Year,
                        GameNumber=GameNumber,
                        TEAM=Team,
                        GameLocation=GameLocation)

                    PathList.append(path)
                    #req = get(path)
            p = Pool(8)  # Pool tells how many at a time
            records = p.map(GetAndParsePath, PathList)
            p.terminate()
            p.join()

            with open(
                    'output/PlayTypeCounts-Year-' + str(Year) + '-Game-' +
                    str(GameNumber) + '.json', 'w') as outfile:
                json.dump(PlayTypeDict, outfile)
コード例 #33
0
ファイル: olt.py プロジェクト: sjava/olt
def zte_gpon_svlan_check():
    clear_log()
    nodes = graph.cypher.execute(
        "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)")
    olts = ((x[0], x[1]) for x in nodes)
    lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1])
    pool = Pool(8)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, lzte_gpon_svlan), olts))
    pool.close()
    pool.join()
コード例 #34
0
ファイル: switch.py プロジェクト: sjava/olt
def interface_check_m():
    clear_log()
    #  cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model"
    cmd = "match(s: Switch)  return s.ip, s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    out_inf = partial(output_interface_m, lock)
    list(pool.map(compose(out_inf, get_interface), switchs))
    pool.close()
    pool.join()
コード例 #35
0
ファイル: olt.py プロジェクト: sjava/olt
def svlan_check():
    clear_log()
    #  nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246')
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='company', property_value='zte')
    olts = [(x['ip'], x['company'], x['area']) for x in nodes]
    #  list(map(compose(card_entry, get_card), olts))
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, get_svlan), olts))
    pool.close()
    pool.join()
コード例 #36
0
ファイル: olt.py プロジェクト: sjava/weihu
def add_infs():
    funcs = {'zte': Zte.get_infs, 'hw': Huawei.get_infs}
    get_infs = partial(_company, funcs)

    clear_log()
    nodes = graph.cypher.execute(
        'match (n:Olt) return n.ip as ip,n.company as company')
    olts = [dict(ip=x['ip'], company=x['company']) for x in nodes]
    pool = Pool(128)
    lock = Manager().Lock()
    _add_infs_p = partial(_add_infs, lock)
    list(pool.map(compose(_add_infs_p, get_infs), olts))
    pool.close()
    pool.join()
コード例 #37
0
ファイル: makeallinputs.py プロジェクト: mhearne-usgs/slab2
def main(args):

    filedate = args.filedate
    database = args.database

    slablist = ['alu','cal','cam','car','cas','cot','hal','hel','him','hin','izu','jap','ker','kur','mak','man','mue','pam','png','phi','puy','ryu','sam','sco','sol','sul','sum','van']

    indices = range(len(slablist))
    pool1 = Pool(args.nCores)
    partial_loop1 = partial(calls2d, database, filedate, slablist)

    pts = pool1.map(partial_loop1, indices)
    pool1.close()
    pool1.join()
コード例 #38
0
ファイル: olt.py プロジェクト: sjava/olt
def hostname_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(hostname_entry, lock)
    list(pool.map(compose(func, get_hostname), olts))
    pool.close()
    pool.join()
    ip_hostname = (x.split(',') for x in open(result_file))
    cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}"
    list(map(lambda x: graph.cypher.execute(
        cmd, ip=x[0], hostname=x[1]), ip_hostname))
コード例 #39
0
ファイル: tools.py プロジェクト: sjava/webapp
def get_vlan_usersP(bras):
    def _get_vlan_users(bas):
        funcs = {'m6k': M6k.get_vlan_users,
                 'me60': ME60.get_vlan_users}
        _gvu = partial(_model, funcs)
        return _gvu(bas)

    bras = [dict(ip=x[0], model=x[1], inf=x[2])
            for x in bras]
    pool = Pool(len(bras))
    temp = pool.map(_get_vlan_users, bras)
    pool.close()
    pool.join()
    temp = [x[1] for x in temp if x[1]]
    rslt = reduce(lambda x, y: merge_with(sum, x, y), temp)
    return rslt
コード例 #40
0
ファイル: core.py プロジェクト: XuChongBo/pydemo
 def calculate(self, data):
     t1 = dt.datetime.utcnow()
     LOGGER.info('Starting calculation...')
     self._data = deepcopy(data)
     self._check_inputs(data)
     dep = self._dependencies()
     sorted_dep = topological_sort(dep)
     for items in sorted_dep:
         # loading node with inputs
         for item in items:
             node = self._get_node(item)
             args = [i_name for i_name in node.input_names if i_name not in node.kwargs]
             data_to_pass = []
             for arg in args:
                 data_to_pass.append(self._data[arg])
             kwargs_to_pass = {}
             for kwarg in node.kwargs:
                 kwargs_to_pass[kwarg] = self._data[kwarg]
             node.load_inputs(data_to_pass, kwargs_to_pass)
         # running nodes
         if self._parallel:
             pool = Pool(self._pool_size)
             results = pool.map(
                 Graph.run_node,
                 [self._get_node(i) for i in items]
             )
             pool.close()
             pool.join()
             results = {k: v for k, v in results}
         else:
             results = {}
             for item in items:
                 node = self._get_node(item)
                 res = node.run_with_loaded_inputs()
                 results[node.id] = res
         # save results
         for item in items:
             node = self._get_node(item)
             res = results[node.id]
             if len(node.output_names) == 1:
                 self._data[node.output_names[0]] = res
             else:
                 for i, out in enumerate(node.output_names):
                     self._data[out] = res[i]
     t2 = dt.datetime.utcnow()
     LOGGER.info('Calculation finished in {}'.format(t2-t1))
     return res
コード例 #41
0
ファイル: olt.py プロジェクト: sjava/olt
def zhongji_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(zhongji_entry, lock)
    list(pool.map(compose(func, get_zhongji), olts))
    pool.close()
    pool.join()
    ports = (x.split(',') for x in open(result_file))
    cmd = """match(n: Olt) where n.ip = {ip} 
    merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) 
    merge(m) - [:Include]->(p: Port{name: {interface}})"""
    list(map(lambda x: graph.cypher.execute(
        cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
コード例 #42
0
def parallel_cdist(data1, data2, n_rows_per_job=100):

    from scipy.spatial.distance import cdist

    data1 = np.array(data1)
    data2 = np.array(data2)

    pool = Pool(12)

    start_indices = np.arange(0, data1.shape[0], n_rows_per_job)
    end_indices = start_indices + n_rows_per_job - 1

    partial_distance_matrices = pool.map(lambda (si, ei): cdist(data1[si:ei+1].copy(), data2), zip(start_indices, end_indices))
    pool.close()
    pool.join()

    distance_matrix = np.concatenate(partial_distance_matrices)
    return distance_matrix
コード例 #43
0
ファイル: problem_risk.py プロジェクト: ttinoco/GRIDOPT
    def eval_EFG(self,x,num_procs=None,info=False):

        from multiprocess import Pool,cpu_count

        if not num_procs:
            num_procs = cpu_count()
        num_samples = self.parameters['num_samples']
        pool = Pool(num_procs)
        num = int(np.ceil(float(num_samples)/float(num_procs)))
        results = list(zip(*pool.map(lambda i: self.eval_EFG_sequential(x,num,i,info),range(num_procs),chunksize=1)))
        pool.terminate()
        pool.join()
        if not info:
            assert(len(results) == 4)
        else:
            assert(len(results) == 5)
        assert(all([len(vals) == num_procs for vals in results]))
        return [sum(vals)/float(num_procs) for vals in results]
コード例 #44
0
ファイル: switch.py プロジェクト: sjava/weihu
def add_power_info():
    funcs = {'S8508': S85.get_power_info,
             'S8505': S85.get_power_info,
             'T64G': T64.get_power_info,
             'S8905': S89.get_power_info,
             'S8905E': S8905E.get_power_info,
             'S9306': S93.get_power_info,
             'S9303': S93.get_power_info}
    get_power_info = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model")
    switches = [dict(ip=x['ip'], model=x['model']) for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_power_info, lock)
    list(pool.map(compose(_ff, get_power_info), switches))
    pool.close()
    pool.join()
コード例 #45
0
ファイル: switch.py プロジェクト: sjava/weihu
def add_traffics():
    funcs = {'S8508': S85.get_traffics,
             'S8505': S85.get_traffics,
             'T64G': T64.get_traffics,
             'S8905': S89.get_traffics,
             'S8905E': S8905E.get_traffics,
             'S9306': S93.get_traffics,
             'S9303': S93.get_traffics}
    get_traffics = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model")
    switchs = [dict(ip=x['ip'], infs=x['infs'], model=x['model'])
               for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_traffics, lock)
    list(pool.map(compose(_ff, get_traffics), switchs))
    pool.close()
    pool.join()
コード例 #46
0
def compute_jaccard_pairwise(indices, square_form=True, parallel=True, return_poses=False):
    n = len(indices)

    if parallel:
        pool = Pool(16)
        scores_poses_tuples = pool.map(lambda x: compute_jaccard_i_vs_list(x[0],x[1]),
                                   [(indices[i], indices[i+1:]) for i in range(n)])
        pool.close()
        pool.join()
    else:
        scores_poses_tuples = [compute_jaccard_i_vs_list(indices[i], indices[i+1:]) for i in range(n)]

    pairwise_scores = np.array([scores for scores, poses in scores_poses_tuples])

    if square_form:
        pairwise_scores = squareform(np.concatenate(pairwise_scores))

    if return_poses:
        poses = np.array([poses for scores, poses in scores_poses_tuples])
        return pairwise_scores, poses
    else:
        return pairwise_scores
コード例 #47
0
ファイル: problem.py プロジェクト: ttinoco/GRIDOPT
    def eval_EQ(self,p,num_procs=None,quiet=True):
        """
        Evaluates E[Q(p,r)] and its gradient in parallel. 

        Parameters
        ----------
        p : generator powers
        num_procs : number of parallel processes
        quiet : flag
        """
       
        from multiprocess import Pool,cpu_count
 
        if not num_procs:
            num_procs = cpu_count()
        num_samples = self.parameters['num_samples']
        pool = Pool(num_procs)
        num = int(np.ceil(float(num_samples)/float(num_procs)))
        results = list(zip(*pool.map(lambda i: self.eval_EQ_sequential(p,num,i,quiet),range(num_procs),chunksize=1)))
        pool.terminate()
        pool.join()
        assert(len(results) == 2)
        assert(all([len(vals) == num_procs for vals in results]))
        return [sum(vals)/float(num_procs) for vals in results]
コード例 #48
0
ファイル: app.py プロジェクト: monkeybaza/python_scripts
 def get_new_tickets(self, from_time=utils.pre_day_to_string(1)):
     search_conditions = {
         "skip": 0,
         "query": {
             "ctimeGte": "{}T21:00:00.000Z".format(from_time)
          }
     }
     pool_size = multiprocess.cpu_count()
     pool_volume = 10 * pool_size
     index = 0
     tickets_num = self._get_number_of_tickets(from_time, to_time)
     req_num = utils.ceil_division(tickets_num, 1000)
     pool = Pool(pool_size)
     for req_count in range(req_num):
         search_tickets = self.search_tickets(search_conditions)
         while True:
             tickets = pool.map(self.add_attr_to_ticket, itertools.islice(search_tickets, pool_volume))
             if tickets:
                 print('Downloaded {}/{} tickets'.format(index, tickets_num), end='\r')
                 index += pool_volume
                 yield tickets
             else:
                 break
         search_conditions['skip'] += 1000
コード例 #49
0
ファイル: crop.py プロジェクト: mistycheney/MouseBrainAtlas
        else:
            raise

#             input_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=5, version=version, resol='raw')
        out_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=2, resol=resol, version=version)
        print 'out_dir:', out_dir
#             script = os.path.join(REPO_DIR, 'preprocess', 'warp_crop_IM_v3.py')

#         ! rm -rf {out_dir}
        create_if_not_exists(out_dir)

        t = time.time()

        pool = Pool(8)
        _ = pool.map(lambda img_name: crop(stack=stack, img_name=img_name, version=version, resol=resol, 
                                        x=x, y=y, w=w, h=h), 
                     metadata_cache['valid_filenames'][stack])
        pool.close()
        pool.join()

#             for img_name in metadata_cache['valid_filenames'][stack]:
#                 f(stack=stack, img_name=img_name, version=version, resol=resol, 
#                                             x=x, y=y, w=w, h=h)

    #     run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d  \"%%(output_fp)s\"' % \
    #                     {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw},
    #                     kwargs_list=[{'input_fp': DataManager.get_image_filepath_v2(stack=stack, prep_id=5, resol='raw', version=version, fn=img_name),
    #                                   'output_fp': DataManager.get_image_filepath_v2(stack=stack, fn=img_name, prep_id=2, version=version, resol='raw')}
    #                                  for img_name in metadata_cache['valid_filenames'][stack]],
    # #                                  for img_name in ['CHATM3_slide35_2018_02_17-S1']],
    #                     argument_type='single',
コード例 #50
0
                ntb_to_nissl[ntb_v] = np.unique(a)[0]

        ntb_values = np.arange(0, 5000)
        ntb_matched_values = np.interp(ntb_values, 
                                       [ntb_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())], 
                                       [nissl_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())])
    
        sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t))
        
        return ntb_matched_values, (region1_x, region1_y, region1_w, region1_h)
        
    
    n_regions = 8
    
    pool = Pool(4)
    res = pool.map(f, range(n_regions))
    ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res)
    pool.close()
    pool.join()
    
#     for region_id in range(10):
        
#         while True:
#             region1_x = np.random.randint(0, w-10000, 1)[0]
#             region1_y = np.random.randint(0, h-10000, 1)[0]
#             region1_w = 5000
#             region1_h = 5000
#             print region1_x, region1_y, region1_w, region1_h
            
#             tb_region1_xmin = region1_x / 32
#             tb_region1_xmax = (region1_x + region1_w) / 32
コード例 #51
0
    
        ntb_blue_inv_bins = np.arange(5001)
        ntb_inv_to_nissl_mapping = np.interp(ntb_blue_inv_bins, ntb_inv_vals, nissl_vals)
        
        ntb_to_nissl_mapping = ntb_inv_to_nissl_mapping[5000 - ntb_blue_bins]
        ntb_to_nissl_mapping = np.round(ntb_to_nissl_mapping).astype(np.uint8)
                        
        ntb_matched_values_all_examples_one_section.append(ntb_to_nissl_mapping)
        region_bboxes_all_examples_one_section.append((region1_x, region1_y, region1_w, region1_h))
    
        sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t))
        
        return ntb_to_nissl_mapping, (region1_x, region1_y, region1_w, region1_h)
            
    pool = Pool(4)
    res = pool.map(match_intensity_histogram_one_region, regions)
    ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res)
    pool.close()
    pool.join()
    

    fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_intensity_mapping_all_regions.npy' % (ntb_fn, nissl_fn))
    create_parent_dir_if_not_exists(fp)
    np.save(fp, np.asarray(ntb_matched_values_all_examples_one_section))
    upload_to_s3(fp)

    fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_region_bboxes.npy' % (ntb_fn, nissl_fn))
    np.save(fp, np.asarray(region_bboxes_all_examples_one_section))
    upload_to_s3(fp)

    median_mapping_one_section = np.median(ntb_matched_values_all_examples_one_section, axis=0)
コード例 #52
0
def compute_jaccard_list_vs_all(seed_indices):
    pool = Pool(14)
    affinities_to_seeds = np.array(pool.map(lambda i: compute_jaccard_i_vs_all(i), seed_indices))
    pool.close()
    pool.join()
    return affinities_to_seeds
コード例 #53
0
def compute_spm_histograms(labelmap, sample_locs, patch_size, M):
    """
    Args:
        labelmap (2d-ndarray of int):
        sample_locs (2d-ndarray): List of (x,y) locations at which to sample the SPM histograms
        M (int): number of unique SIFT descriptor words, aka. size of vocabulary
        
    Returns:
        hists_arr0 ((1,M)-array of int)
        hists_arr1 ((4,M)-array of int)
        hists_arr2 ((16,M)-array of int)
    """

    global labelmap_global
    labelmap_global = labelmap

    # compute level-2 histograms
    l = 2

    grid_size = patch_size / 2**l

    if l == 2:
        rx = [-2, -1, 0, 1]
        ry = [-2, -1, 0, 1]
    elif l == 1:
        rx = [-1, 0]
        ry = [-1, 0]
    elif l == 0:
        rx = [-.5]
        ry = [-.5]

    rxs, rys = np.meshgrid(rx, ry)

    patch_coords_allGrid = []

    for grid_i, (rx, ry) in enumerate(np.c_[rxs.flat, rys.flat]):

        patch_xmin = sample_locs[:,0] + rx * grid_size
        patch_ymin = sample_locs[:,1] + ry * grid_size
        patch_xmax = sample_locs[:,0] + (rx + 1) * grid_size
        patch_ymax = sample_locs[:,1] + (ry + 1) * grid_size

        patch_coords_allGrid.append([patch_xmin, patch_ymin, patch_xmax, patch_ymax])


    all_coords = np.hstack(patch_coords_allGrid)
    patch_xmin = all_coords[0]
    patch_ymin = all_coords[1]
    patch_xmax = all_coords[2]
    patch_ymax = all_coords[3]

    def compute_histogram_particular_label(i):
        m = (labelmap_global == i).astype(np.uint8)
        mi = cv2.integral(m)
        ci = mi[patch_ymin, patch_xmin] + mi[patch_ymax, patch_xmax] - mi[patch_ymax, patch_xmin] - mi[patch_ymin, patch_xmax]
        return ci

    t = time.time()
    # hists = Parallel(n_jobs=16)(delayed(compute_histogram_particular_label)(i) for i in range(1, M+1))
    # hists = Parallel(n_jobs=8)(delayed(compute_histogram_particular_label)(i) for i in range(1, M+1))
    pool = Pool(8)
    hists = pool.map(compute_histogram_particular_label, range(1, M+1))
    # pool.terminate()
    pool.close()
    pool.join()
    # del pool
    sys.stderr.write('done in %f seconds\n' % (time.time() - t)) # ~ 13 seconds

    n_grid = (2**l)**2
    hists_arr2 = np.transpose(np.reshape(hists, (M, n_grid, -1)))
    print hists_arr2.shape

    # compute level-1 histograms based on level-2 histograms

    hists_arr1 = np.transpose([hists_arr2[:, [0,1,4,5], :].sum(axis=1),
                               hists_arr2[:, [2,3,6,7], :].sum(axis=1),
                               hists_arr2[:, [8,9,12,13], :].sum(axis=1),
                               hists_arr2[:, [10,11,14,15], :].sum(axis=1)],
                              [1,0,2])
    print hists_arr1.shape

    # compute level-0 histograms based on level-1 histograms

    hists_arr0 = hists_arr1.sum(axis=1)
    print hists_arr0.shape

    return hists_arr0, hists_arr1, hists_arr2
コード例 #54
0
    def save_scoremap(structure):
        viz_fp = DataManager.get_scoremap_viz_filepath(stack=stack, downscale=downscale, fn=fn, structure=structure, detector_id=detector_id)
        create_parent_dir_if_not_exists(viz_fp)
        
        try:
            if add_label_text:
                label_text = str(structure)
            else:
                label_text = None
            viz = scoremap_overlay_on(bg='original', stack=stack, fn=fn, structure=structure,
                                out_downscale=downscale, label_text=label_text, detector_id=detector_id,
                                     cmap_name=cmap_name, image_version=bg_image_version)
            imsave(viz_fp, img_as_ubyte(viz))
            upload_to_s3(viz_fp)
        except Exception as e:
            # raise e
            sys.stderr.write('%s\n' % e.message)
            return

    # for s in all_known_structures:
        # save_scoremap(s)

    pool = Pool(NUM_CORES/2)
    pool.map(save_scoremap, all_known_structures)
    pool.close()
    pool.join()

    sys.stderr.write('Visualize scoremaps: %.2f seconds.\n' % (time.time() - t))
    # 7s for one structure, one section, single process
    # 20s for all structures, one section, 8 processes
コード例 #55
0
ファイル: resize.py プロジェクト: mistycheney/MouseBrainAtlas
if hasattr(args, "rescale_factor"):
    rescale_factor = args.rescale_factor
else:
    w = args.width
    h = args.height

n_jobs = args.jobs

def worker(img_name):

    input_fp = input_fp_map[img_name]
    output_fp = output_fp_map[img_name]
    create_parent_dir_if_not_exists(output_fp)

    img = imread(input_fp)
    save_data(img[::1/rescale_factor, ::1/rescale_factor], output_fp)


pool = Pool(n_jobs)
_ = pool.map(worker, in_image_names)
pool.close()
pool.join()

# run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d  \"%%(output_fp)s\"' % \
#                 {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw},
#                 kwargs_list=[{'input_fp': ,
#                               'output_fp': output_fp_map[img_name]}
#                              for img_name in metadata_cache['valid_filenames'][stack]],
#                 argument_type='single',
#                jobs_per_node=1,
#                local_only=True)
コード例 #56
0
structure_colors = {n: np.random.randint(0, 255, (3,)) for n in all_known_structures}

def generate_annotation_viz_one_section(stack, fn, structure_colors=structure_colors, downsample_factor=downsample_factor):
    global contours
    
    if is_invalid(fn):
        return
    
    img_fp = DataManager.get_image_filepath(stack=stack, fn=fn, resol='lossless', version='compressed')
    download_from_s3(img_fp)
    img = imread(img_fp)
    viz = img[::downsample_factor, ::downsample_factor].copy()
    
    for name_u, color in structure_colors.iteritems():
        matched_contours = contours[(contours['name'] == name_u) & (contours['filename'] == fn)]
        for cnt_id, cnt_props in matched_contours.iterrows():
            cv2.polylines(viz, [(cnt_props['vertices']/downsample_factor).astype(np.int)], True, color, 2)
    
    viz_fp = DataManager.get_annotation_viz_filepath(stack=stack, fn=fn)
    create_parent_dir_if_not_exists(viz_fp)
    imsave(viz_fp, viz)
    upload_to_s3(viz_fp)

# for fn in filenames:
#     generate_annotation_viz_one_section(fn=fn)

pool = Pool(NUM_CORES/2)
pool.map(lambda fn: generate_annotation_viz_one_section(stack=stack, fn=fn, structure_colors=structure_colors, downsample_factor=downsample_factor), filenames)
pool.close()
pool.join()
コード例 #57
0
#     sys.stderr.write('Compute saturation: %.2f seconds\n' % (time.time() - t1)) # skimage 6.5s; opencv 5s


def generate_versions(fn, which):

    input_fn=os.path.join(input_dir, fn)
    basename = os.path.splitext(os.path.basename(fn))[0]

    if 'compressed' in which:
        output_compressed_fn = os.path.join(output_compressed_dir, basename + '_compressed.jpg')
        if 'compressed' in which:
            if os.path.exists(output_compressed_fn):
                sys.stderr.write('File exists: %s.\n' % output_compressed_fn)
            else:
                os.system("convert %(input_fn)s -format jpg %(output_compressed_fn)s" % \
                    dict(input_fn=input_fn, output_compressed_fn=output_compressed_fn))

    if 'saturation' in which:
        # output_saturation_fn = os.path.join(output_saturation_dir, basename + '_saturation.jpg') # why jpg?
        output_saturation_fn = os.path.join(output_saturation_dir, basename + '_saturation.tif')
        if os.path.exists(output_saturation_fn):
            sys.stderr.write('File exists: %s.\n' % output_saturation_fn)
        else:
            convert_to_saturation(input_fn, output_saturation_fn, rescale=True)

#Parallel(n_jobs=4)(delayed(generate_versions)(fn, which) for fn in filenames)
pool = Pool(4)
pool.map(lambda fn: generate_versions(fn, which), filenames)
pool.close()
pool.join()
コード例 #58
0
    for iy, y0 in enumerate(np.arange(0, img_h, 5000)):
        for ix, x0 in enumerate(np.arange(0, img_w, 5000)):
            origins.append((x0, y0))

    alg = 'cellprofiler'

    big_labelmap = np.zeros((img_h, img_w), dtype=np.int64)
    n = 0
    for i, input_fp in enumerate(input_fps):
        prefix = os.path.splitext(input_fp)[0]
        labelmap = labelmap_alltiles[i].astype(np.int64) # astype(np.int64) is important, otherwise results in negative label values.
        x0, y0 = origins[i]
        big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n
        n += labelmap.max()

    labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)
    bp.pack_ndarray_file(big_labelmap, labelmap_fp)
    upload_to_s3(labelmap_fp)
    
    for fp in input_fps:
        execute_command('rm ' + fp)        

t = time.time()

pool = Pool(NUM_CORES/2)
pool.map(detect_cells, filenames)
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
コード例 #59
0
        big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n
        n += labelmap.max()

    labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)

    bp.pack_ndarray_file(big_labelmap, labelmap_fp)

#     for tile_i in range(12):
#         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d.tif' % \
#                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))
#         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d_labelmap_cellprofiler.bp' % \
#                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))

    # Generate labelmap viz
    t = time.time()

    viz = img_as_ubyte(label2rgb(big_labelmap, bg_label=0, bg_color=(0, 0, 0)))
    cv2.imwrite(os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.png' % dict(alg=alg), viz);

    sys.stderr.write('Generate labelmap viz: %.2f seconds.\n' % (time.time()-t)) # 60s


t = time.time()

pool = Pool(12)
pool.map(detect_cells, range(first_sec, last_sec+1))
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
コード例 #60
0
        dense_score_map[dense_score_map < 1e-1] = 0
        dense_score_map[dense_score_map > 1.] = 1.
#             sys.stderr.write('threshold: %.2f seconds\n' % (time.time() - t))

        if np.count_nonzero(dense_score_map) < 1e5:
            sys.stderr.write('No %s is detected on section %d\n' % (structure, sec))
            return None

        t1 = time.time()

        scoremap_bp_filepath, scoremap_interpBox_filepath = \
        DataManager.get_scoremap_filepath(stack=stack, fn=fn, anchor_fn=anchor_fn, structure=structure,
                                          return_bbox_fp=True, setting=actual_setting)

        save_hdf(dense_score_map.astype(np.float16), scoremap_bp_filepath, complevel=5)
        np.savetxt(scoremap_interpBox_filepath,
               np.array((interpolation_xmin, interpolation_xmax, interpolation_ymin, interpolation_ymax))[None],
               fmt='%d')

        sys.stderr.write('save: %.2f seconds\n' % (time.time() - t1)) # 4s, very high penalty when multiprocessing


    t = time.time()

    pool = Pool(4) # 8 causes contention, resuls in high upscaling and dumping to disk time.
    _ = pool.map(generate_score_map, structures)
    pool.close()
    pool.join()

    sys.stderr.write('interpolate: %.2f seconds\n' % (time.time() - t)) # ~ 30 seconds / section