コード例 #1
0
    def performStage2TilesSegmentation(self, tilesImgDIR, tilesMaskedDIR, tilesSegsDIR, tilesSegBordersDIR, tmpDIR, tilesBase, s1BordersImage, segStatsInfo, minPxlsVal, distThresVal, bandsVal, ncpus):
        rsgisUtils = rsgislib.RSGISPyUtils()
        imgTiles = glob.glob(os.path.join(tilesImgDIR, tilesBase+"*.kea"))
        for imgTile in imgTiles:
            baseName = os.path.splitext(os.path.basename(imgTile))[0]        
            maskedFile = os.path.join(tilesMaskedDIR, baseName + '_masked.kea')
            dataType = rsgisUtils.getRSGISLibDataTypeFromImg(imgTile)
            imageutils.maskImage(imgTile, s1BordersImage, maskedFile, 'KEA', dataType, 0, 0)
            
        imgTiles = glob.glob(os.path.join(tilesMaskedDIR, tilesBase+"*_masked.kea"))
        def stage2threadedTiledImgSeg(imgTile):
                baseName = os.path.splitext(os.path.basename(imgTile))[0]        
                clumpsFile = os.path.join(tilesSegsDIR, baseName + '_segs.kea')
                kMeansCentres, imgStretchStats = self.findSegStatsFiles(imgTile, segStatsInfo)
                segutils.runShepherdSegmentationPreCalcdStats(imgTile, clumpsFile, kMeansCentres, imgStretchStats, outputMeanImg=None, tmpath=os.path.join(tmpDIR, baseName+'_segstemp'), gdalformat='KEA', noStats=False, noStretch=False, noDelete=False, minPxls=minPxlsVal, distThres=distThresVal, bands=bandsVal, processInMem=False)	
		
        p = Pool(ncpus)
        p.map(stage2threadedTiledImgSeg, imgTiles)
        
        segTiles = glob.glob(os.path.join(tilesSegsDIR, tilesBase+"*_segs.kea"))
        for segTile in segTiles:
            baseName = os.path.splitext(os.path.basename(segTile))[0]        
            borderMaskFile = os.path.join(tilesSegBordersDIR, baseName + '_segsborder.kea')
            rastergis.defineBorderClumps(segTile, 'BoundaryClumps')
            rastergis.exportCol2GDALImage(segTile, borderMaskFile, 'KEA', rsgislib.TYPE_8UINT, 'BoundaryClumps')
コード例 #2
0
def run(non_iter_args, do_multiprocessing):
    [
        weightcalcdata, weightcalculator, box, startindex, size,
        newconnectionmatrix, method, boxindex, filename, headerline,
        writeoutput
    ] = non_iter_args

    partial_gaincalc_oneset = partial(calc_weights_oneset, weightcalcdata,
                                      weightcalculator, box, startindex, size,
                                      newconnectionmatrix, method, boxindex,
                                      filename, headerline, writeoutput)

    if do_multiprocessing:
        pool = Pool(processes=pathos.multiprocessing.cpu_count())
        pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes)

        # Current solution to no close and join methods on ProcessingPool
        # https://github.com/uqfoundation/pathos/issues/46

        s = pathos.multiprocessing.__STATE['pool']
        s.close()
        s.join()
        pathos.multiprocessing.__STATE['pool'] = None

    else:
        for causevarindex in weightcalcdata.causevarindexes:
            partial_gaincalc_oneset(causevarindex)

    return None
コード例 #3
0
def extract_hits(bins_to_contig_lists, outdir, contig_file, threads):
    p = Pool(threads)

    pullseq_tmp = os.path.join(outdir, 'pullseq_ids_tmp')
    if not os.path.exists(pullseq_tmp):
        os.system('mkdir ' + pullseq_tmp)

    def pullseq_by_bin(bin_name, contig_list, contig_file):
        #Generates a file with the names of all the contigs to pull out
        #then provides that to pullseq;
        #parses the resulting fasta output from pullseq and then
        #passes it back.
        with open(os.path.join(pullseq_tmp, bin_name + '.txt'),
                  'w') as outfile:
            for element in contig_list:
                outfile.writelines(element + '\n')

        os.system('pullseq -i ' + contig_file + ' -n ' +
                  os.path.join(pullseq_tmp, bin_name + '.txt') + ' > ' +
                  os.path.join(outdir, bin_name + '.fasta'))

        return

    p.map(lambda x: pullseq_by_bin(x, bins_to_contig_lists[x], contig_file),
          bins_to_contig_lists)
    #for bin in bins_to_contig_lists:
    #    pullseq_by_bin(bin, bins_to_contig_lists[bin], contig_file)

    os.system('rm -rf ' + pullseq_tmp)
    p.terminate()
    return
コード例 #4
0
def HashBulkRead(request):
    hashes = request.POST['names']
    hashes_list = hashes.split()
    try:
        if not hashes_list:
            messages.info(request, 'Field is empty!!')
            return render(request, 'bulkfilehash.html')
        else:
            ref = create_ref_code()
            hash_length = len(hashes_list)
            ref_list = list()
            for i in range(hash_length):
                ref_list.append(ref)
                p = Pool(20)
                p.map(checkhash, hashes_list, ref_list)
                result_to_display = Hashes.objects.filter(
                    reference=ref_list[0])
                context = {
                    'data_ip': result_to_display,
                    'reference': ref_list[0],
                    'button': 1
                }
        return render(request, 'bulkfilehash.html', context)
    except:
        messages.info(request, 'check your input, error occured!!')
        return render(request, 'bulkfilehash.html')
コード例 #5
0
def GroupByParallelProcess(tweetsDF, cores, groupMethod):
    """
    Group by and aggregate on time via a parallel process
    """

    tweetsDF.label_date = tweetsDF.label_date.astype(int)
    tweetsDF = tweetsDF.set_index("label_date")
    # Parallelizing using Pool.apply()
    df_split = GetListOfSplitDFs(tweetsDF, cores)
    # create the multiprocessing pool
    pool = Pool(cores)
    # process the DataFrame by mapping function to each df across the pool
    logging.info("Starting the grouping and aggregating process.")
    if groupMethod == "weighted-average":
        df_out = pool.map(PerformGroupbyAndAggregate, df_split)
    elif groupMethod == "sum":
        df_out = pool.map(PerformSum, df_split)
    elif groupMethod == "mean":
        df_out = pool.map(PerformMean, df_split)
    else:
        logging.error("Choose correct group by method.")
        return None

    # close down the pool and join
    pool.close()
    pool.join()
    pool.clear()

    logging.info("Ended the grouping and aggregating process.")

    return df_out
コード例 #6
0
def BulkHash(request):
    if "GET" == request.method:
        return render(request, 'bulkfilehash.html', {'button': 0})
    else:
        excel_file = request.FILES["excel_file"]
        if not excel_file:
            messages.info(request, 'Please select a file!!')
            return render(request, 'bulkfilehash.html')
        wb = openpyxl.load_workbook(excel_file)
        worksheet = wb["Sheet1"]
        excel_data = list()
        ref = create_ref_code()
        row_data = list()
        ref_data = list()
        for row in worksheet.iter_rows():
            for cell in row:
                row_data.append(str(cell.value))
                ref_data.append(ref)
        p = Pool(20)
        p.map(checkhash, row_data, ref_data)
        result_to_display = Hashes.objects.filter(reference=ref_data[0])
        context = {
            'data_ip': result_to_display,
            'reference': ref_data[0],
            'button': 1
        }

        return render(request, 'bulkfilehash.html', context)
コード例 #7
0
ファイル: mutli_basic_fun.py プロジェクト: yixin-09/Bea2Conda
def para_pfulp_res_mutli(in_var, f1, f2, tlimit, cpu_n):
    mp.prec = 100
    start_time = time.time()
    l_var = []
    for i in in_var:
        tmp_l = depart(i[0], i[1])
        l_var.append(tmp_l)
    l_confs = []
    for element in itertools.product(*l_var):
        l_confs.append(element)
    next_tmp_l = []
    print len(l_confs)
    print l_confs
    p = Pool(cpu_n)
    all_input_l = []
    lf1 = []
    lf2 = []
    for j in l_confs:
        var_l = []
        for k in j:
            var_l.append(sorted(np.random.uniform(k[0], k[1], 20)))
        input_l = []
        for element in itertools.product(*var_l):
            input_l.append(element)
        all_input_l.append(input_l)
    lf1.extend([f1] * len(all_input_l))
    lf2.extend([f2] * len(all_input_l))
    res = p.map(test_pulp, lf1, lf2, all_input_l, l_confs)
    next_tmp_l = res
    tmp_time = time.time() - start_time
    print tmp_time
    k = len(next_tmp_l)
    if k > 10:
        k = min(int(len(next_tmp_l) / 2), 10)
    next_tmp_l = sorted(next_tmp_l, reverse=True)[0:k]
    print next_tmp_l[0]
    next_tmp_l_2 = []
    one_time = (tlimit - tmp_time) / (k)
    print one_time
    print tlimit
    print len(next_tmp_l)
    print next_tmp_l
    time_list = []
    gen_l = []
    for i in next_tmp_l:
        tmp_gen_l = produce_interval(i[1], i[2])
        gen_l.append(tmp_gen_l)
    lf1.extend([f1] * len(gen_l))
    lf2.extend([f2] * len(gen_l))
    time_list.extend([one_time] * len(gen_l))
    print gen_l
    res = p.map(fine_search, gen_l, lf1, lf2, time_list)
    print res
    next_tmp_l_2 = res
    next_tmp_l_2 = sorted(next_tmp_l_2, reverse=True)
    print next_tmp_l_2
    end_time = time.time() - start_time
    if len(next_tmp_l_2) == 0:
        return [[0.0, 0.0, [0.0, 0.0]], [0.0, 0.0, [0.0, 0.0]]], end_time
    return next_tmp_l_2[0], end_time
コード例 #8
0
ファイル: make_data_tfrecord.py プロジェクト: timewait/cikm
def loop_files_in_dir(input_dir, output_dir, loop_fn):
    def replace_postfix(x):
        if x.endswith('.txt'):
            x = x[:-4]
        elif x.endswith('.gz'):
            x = x[:-3]
        else:
            pass
        x = x + '.tfrecord'
        return x

    in_filenames = os.listdir(input_dir)
    out_filenames = map(replace_postfix, in_filenames)
    input_paths = map(lambda x: os.path.join(input_dir, x), in_filenames)
    output_paths = map(lambda x: os.path.join(output_dir, x), out_filenames)
    paths = list(zip(input_paths, output_paths))
    N = min(10, len(paths))

    global is_debug
    if is_debug is True:
        for in_path, out_path in paths:
            loop_fn((in_path, out_path))
    else:
        pool = Pool(N)
        pool.map(loop_fn, paths)
コード例 #9
0
ファイル: extract_reads.py プロジェクト: daaaaande/FUCHS
    def run(self):

        tempfile.tempdir = self.tmp_folder  # set global tmp dir

        circle_info, circle_reads = self.read_circles(self.circles)
        print('DONE reading circles, found %s circles' % (len(circle_info)))
        reads = self.load_alignment(self.bamfile, circle_reads,
                                    self.mapq_cutoff)
        print('DONE extracting circular reads')
        folders = os.listdir(self.outfolder)
        if not self.sample in folders:
            os.mkdir('%s/%s' % (self.outfolder, self.sample))
        self.write_circle_bam(reads, circle_info, self.cutoff, self.bamfile,
                              '%s/%s' % (self.outfolder, self.sample))
        print('DONE writing circle bam files\n')
        # files = os.listdir('%s/%s' % (self.outfolder, self.sample))
        import glob

        files = glob.glob('%s/%s/*.bam' % (self.outfolder, self.sample))

        # possible sorted files from previous run
        sorted_bams = glob.glob('%s/%s/*.sorted.bam' %
                                (self.outfolder, self.sample))

        # fix the file / circle count
        actual_bams = len(files) - len(sorted_bams)

        print(
            '%s circles passed your thresholds of at least %s reads with at least a mapq of %s\n\n'
            % (actual_bams, self.cutoff, self.mapq_cutoff))

        from pathos.multiprocessing import ProcessingPool as Pool

        pool = Pool(self.cpus)
        pool.map(run_parallel, files)
コード例 #10
0
    def performStage3SubsetsSegmentation(self, subsetImgsMaskedDIR,
                                         subsetSegsDIR, tmpDIR, subImgBaseName,
                                         segStatsInfo, minPxlsVal,
                                         distThresVal, bandsVal, ncpus):
        imgTiles = glob.glob(
            os.path.join(subsetImgsMaskedDIR, subImgBaseName + "*_masked.kea"))

        def stage3threadedTiledImgSeg(imgTile):
            baseName = os.path.splitext(os.path.basename(imgTile))[0]
            clumpsFile = os.path.join(subsetSegsDIR, baseName + '_segs.kea')
            kMeansCentres, imgStretchStats = self.findSegStatsFiles(
                imgTile, segStatsInfo)
            segutils.runShepherdSegmentationPreCalcdStats(
                imgTile,
                clumpsFile,
                kMeansCentres,
                imgStretchStats,
                outputMeanImg=None,
                tmpath=os.path.join(tmpDIR, baseName + '_segstemp'),
                gdalformat='KEA',
                noStats=False,
                noStretch=False,
                noDelete=False,
                minPxls=minPxlsVal,
                distThres=distThresVal,
                bands=bandsVal,
                processInMem=False)

        p = Pool(ncpus)
        p.map(stage3threadedTiledImgSeg, imgTiles)
コード例 #11
0
    def run(self):

        # initializing the result table file
        self.exon_count_file = '%s/%s.exon_counts.txt' % (self.inputfolder, self.sample)
        exon_counts_out = open(self.exon_count_file, 'w')
        exon_counts_out.write('sample\tcircle_id\ttranscript_id\tother_ids\texon_id\tchr\tstart'
                              '\tend\tstrand\texon_length\tunique_reads\tfragments\tnumber+\tnumber-\n')
        exon_counts_out.close()

        output_file = open('%s/%s.exon_counts.bed' % (self.inputfolder, self.sample), 'w')
        output_file.write('# BED12\n')
        output_file.close()

        # all circle files in a given folder
        files = os.listdir('%s/%s' % (self.inputfolder, self.sample))

        # create folder for coverage profiles
        folders = os.listdir(self.inputfolder)
        if not '%s.coverage_profiles' % (self.sample) in folders:
            os.mkdir('%s/%s.coverage_profiles' % (self.inputfolder, self.sample))

        from pathos.multiprocessing import ProcessingPool as Pool

        pool = Pool(self.cpus)
        pool.map(self.run_parallel, files)
コード例 #12
0
ファイル: diffind-wf.py プロジェクト: BiobankLab/DIFFIND
 def run(self):
     cmd_list = []
     for f in self.param['files']:
         if self.param['nucleotide'] == True:
             cmd_list.append([
                 'cdhit-est-2d', '-i',
                 str(self.param['ref_cleared']), '-i2',
                 str(f), '-c',
                 str(self.param['c']), '-g',
                 str(self.param['g']), '-s2',
                 str(self.param['s2']), '-o', self.param['odir'] + '/' +
                 os.path.splitext(os.path.basename(f))[0]
             ])
         else:
             cmd_list.append([
                 'cdhit-2d', '-i',
                 str(self.param['ref_cleared']), '-i2',
                 str(f), '-c',
                 str(self.param['c']), '-g',
                 str(self.param['g']), '-s2',
                 str(self.param['s2']), '-o', self.param['odir'] + '/' +
                 os.path.splitext(os.path.basename(f))[0]
             ])
     #print cmd_list
     p = Pool(int(self.param['threads']))
     # p.map(cdhit_analisys.exec_cdhit, cmd_list)
     with Pool(int(self.param['threads'])) as p:
         p.map(cdhit_analisys.exec_cdhit, cmd_list)
コード例 #13
0
def main():
	dfref = utrdf.merge(right = mRNAdf, on='#transcript')
	df = find_cds_seq(dfref)
	dfl = [df]*len(codonList) ### replicate the dateframe as a list for the length of the input codon list
	# find_codon_positions(df, codonList)
	### split the codon list into two sets to ease the computational burder
	p = Pool(nodes=int(args.threadNumb))
	p.map(find_codon_positions_multi, codonList, dfl)
コード例 #14
0
ファイル: minnie.py プロジェクト: JoaoRodrigues/minnie
def findbonds(self):
    """Calculates interactions between and/or within monomers"""
    if self.help:
        print(
            "Calculates interactions between and/or within monomers\n"
            f'\n\033[1mUsage: minnie findbonds \n'
            f'                        -cn, --complexName     <string>     \n '
            f'                                               Project ID of your complex\n\n'
            f'                        -p, --pdbs             [<.pdb>/<path>] (singleframe.pdb)   \n'
            f'                                               Give single *.pdb or give folder path \n\n'
            f'                        -i                     [<hbonds>/<ionic>/<hydrophobic>/<ring_stacking>/<all>] (hbonds)    \n'
            f'                                               Calculates which types of interactions \n\n'
            f'                        -d                      <float> (2.5)                 \n'
            f'                                               Cut-off to define a hydrogen bond\n\n'
            f'                        -intra, --includeIntra [<"True">/<"False">] ("False")  \n'
            f'                                               What do you want to analyze, all or only inter-monomer contacts? \033[0m \n\n\n\n'
            f'\n\033[1mUsage example:\033[0m\n\n'
            " Single frame    - minnie findbonds -cn sox4 -p sox4/02_frames/md_0.pdb -i hbonds  -s False  \n"
            " Multiple frames - minnie findbonds -cn sox4 -p sox4/02_frames/* -i hbonds \n"
            " Multiple frames - minnie findbonds -cn sox4 -p sox4/02_frames/* -i all \n"
        )
    elif not self.pdbs:
        print(f'where is pdb??')
    elif not self.complexName:
        print(f'Please specify complex name(s)')

    elif (self.systematic) == "True":
        pdb_list = self.pdbs
        if (self.intType == "all"):
            for intType in ["hbonds", "ionic", "hydrophobic", "ring_stacking"]:
                pool = Pool(pathos.multiprocessing.cpu_count() - 2)
                pool.map(analysis.comb_int, pdb_list,
                         len(pdb_list) * [str(self.complexName)],
                         len(pdb_list) * [str(intType)],
                         len(pdb_list) * [str(self.includeIntra)],
                         len(pdb_list) * [str(self.hbond_distance)])
                #pool.close()

        else:
            pool = pathos.multiprocessing.ProcessingPool(
                pathos.multiprocessing.cpu_count() - 2)
            pool.map(analysis.comb_int, pdb_list,
                     len(pdb_list) * [str(self.complexName)],
                     len(pdb_list) * [str(self.intType)],
                     len(pdb_list) * [str(self.includeIntra)],
                     len(pdb_list) * [str(self.hbond_distance)])
            pool.close()
        analysis.combine_interfacea_results(self.complexName)
    elif (self.systematic) == "False":
        if (self.intType == "all"):
            for intType in ["hbonds", "ionic", "hydrophobic", "ring_stacking"]:
                analysis.comb_int(self.pdbs[0], self.complexName, intType,
                                  self.includeIntra, self.hbond_distance)
        else:
            analysis.comb_int(self.pdbs[0], self.complexName, self.intType,
                              self.includeIntra, self.hbond_distance)

        analysis.combine_interfacea_results(self.complexName)
コード例 #15
0
def main():

    print "starting fastq-dump"

    p = Pool(nodes=40)
    p.map(get_FASTQ_RP, srrList_RP)
    p.map(get_FASTQ_RNA, srrList_RNA)
    rename_RP_fastq()
    rename_RNA_fastq()
コード例 #16
0
def main():
    print("\n\n正在下载%s的全文...\n" % KEYWORDS)

    #新建文件夹
    if not os.path.exists(KEYWORDS):  #如果文件夹不存在就创建
        os.mkdir(KEYWORDS)

    #清空notFound文件
    file_path_notFound = '{0}/{1}'.format(KEYWORDS, KEYWORDS + '_notFound')
    open(file_path_notFound, 'w', encoding='utf-8').close()  #清空文件内容

    #打开info文件,从中提取uid, doi, pmcid, title信息,分别保存成list用于多线程参数
    file_path_info = '{0}/{1}'.format(KEYWORDS, KEYWORDS + '_info')

    with open(file_path_info, 'r', encoding='utf-8') as fh:
        line_number = 0
        uid = []
        doi = []
        pmcid = []
        title = []
        for line in fh:
            line_number += 1
            list_line = line.strip().split('\t')
            if line_number % 4 == 1:
                title0 = ''
                uid.append(list_line[0])
                doi.append(list_line[3])
                pmcid.append(list_line[4])
                title1 = str(list_line[1]) + '-'
            if line_number % 4 == 2:
                title2 = re.sub(PATTERN_title, ' ', str(list_line[0])[7:])
                title0 = title1 + title2
                title.append(title0)
                title0 = ''

    #统计下载前文件夹中文件数量
    file_count_before = len([
        name for name in os.listdir(KEYWORDS)
        if os.path.isfile(os.path.join(KEYWORDS, name))
    ])

    #多线程运行down_paper_from_PMC_and_SciHub任务
    pool = Pool(4)
    pool.map(down_paper_from_PMC_and_SciHub, uid, doi, pmcid, title)

    #统计下载前文件夹中文件数量
    file_count_after = len([
        name for name in os.listdir(KEYWORDS)
        if os.path.isfile(os.path.join(KEYWORDS, name))
    ])

    #输出运行结束信息
    print("文献全文下载结束!本次运行下载了%d篇文献!" % (file_count_after - file_count_before))
    print("下载的文献全文保存在%s文件夹中。" % KEYWORDS)
    print("下载失败的文献信息保存在%s_notFound.txt中。\n" % KEYWORDS)
コード例 #17
0
ファイル: encode_processor.py プロジェクト: saketkc/moca_web
 def run_all_control_analysis(self):
     dirs = dir_walker(self.encode_root)
     control_dir = None
     for d in dirs:
         if 'control' in d.lower():
             control_dir = d
     assert control_dir is not None
     replicates = dir_walker(control_dir, level=1)
     pool = ProcessingPool(nodes=14)
     pool.map(self.control_analysis, tuple(replicates))
     return replicates
コード例 #18
0
ファイル: utils.py プロジェクト: bcgsc/straglr
def parallel_process(func, args, nprocs, bam=None, fasta=None):
    p = Pool(nprocs)
    if bam is not None and fasta is not None:
        results = p.map(func, args, [bam] * len(args), [fasta] * len(args))
    elif bam is not None and fasta is None:
        results = p.map(func, args, [bam] * len(args))
    elif bam is None and fasta is not None:
        results = p.map(func, args, [fasta] * len(args))
    else:
        results = p.map(func, args)
    return results
コード例 #19
0
ファイル: encode_processor.py プロジェクト: saketkc/moca_web
 def run_all_control_analysis(self):
     dirs = dir_walker(self.encode_root)
     control_dir = None
     for d in dirs:
         if 'control' in d.lower():
             control_dir = d
     assert control_dir is not None
     replicates = dir_walker(control_dir, level=1)
     pool = ProcessingPool(nodes=14)
     pool.map(self.control_analysis, tuple(replicates))
     return replicates
コード例 #20
0
    def execute_all_parallel(self,
                             n_threads=multiprocessing.cpu_count(),
                             checkpoint_size=None,
                             prepend_existing=True):
        pool = Pool(n_threads)

        # wrapper because of map function's lack of multi argument support

        def func_star(fnid_args_kwargs):
            print "Executing function..."
            t0 = time.time()
            fn_output = self.fn[0](*(fnid_args_kwargs[1]),
                                   **(fnid_args_kwargs[2]))
            t1 = time.time()
            print "...completed (Time elapsed: ", str(t1 - t0), ")."
            return fn_output

        self.speak("Starting parallel scheduled execution (", str(n_threads),
                   " threads)...")
        if checkpoint_size is not None:
            t0 = time.time()
            checkpoints = range(0, len(self.fn_calls), checkpoint_size)
            for chkid, chk in enumerate(checkpoints):
                fn_call_ids = range(min(checkpoint_size, len(self.fn_calls)))
                self.speak("Starting scheduled batch " + str(chkid + 1) +
                           " of " + str(len(checkpoints)) + "...")
                t2 = time.time()
                fn_outputs = pool.map(
                    func_star,
                    self.fn_calls[min(fn_call_ids):max(fn_call_ids) + 1])
                t3 = time.time()
                self.speak("Scheduled batch complete. Time elapsed: ",
                           str(t3 - t2))
                self.speak("-----------------------")
                self.save_results(fn_call_ids, fn_outputs, prepend_existing)
                del self.fn_calls[min(fn_call_ids):max(fn_call_ids) + 1]
                if self.savefile_path is not None:
                    self.save_schedule()
            if self.savefile_path is not None:
                os.remove(
                    self.savefile_path)  # delete (now empty) snapshot file
            t1 = time.time()

        else:
            t0 = time.time()
            fn_outputs = pool.map(func_star, self.fn_calls)
            t1 = time.time()
            self.save_all_results(fn_outputs, prepend_existing)
        self.speak("Multi-schedule complete. Total time elapsed: ",
                   str(t1 - t0))

        return fn_outputs
コード例 #21
0
def main():

    context = os.path.join(os.getcwd(), outdir)

    if not os.path.exists(context):
        os.system('mkdir ' + context)
    hits_recs = list(SeqIO.parse(os.path.join(os.getcwd(), infile), 'fasta'))
    hits_ids = [rec.id for rec in hits_recs]

    p = Pool(threads)
    p.map(lambda hit_id: get_context(hit_id, context, hits_ids), hits_ids)

    print('boogie')
コード例 #22
0
ファイル: initsync_pipe.py プロジェクト: iagcl/data_pipeline
def parallelise_initsync(argv, ssp_params, process_control_id, logger):
    # Pivot the collection of source_system_profile records into
    # three separate lists to enable us to call pool.map on each record
    (source_schemas, tables, target_schemas,
     query_conditions) = map(list, zip(*ssp_params))

    source_conn_detail = dbuser.get_dbuser_properties(argv.sourceuser)
    target_conn_detail = dbuser.get_dbuser_properties(argv.targetuser)

    logger.info("Processing tables with {} dedicated worker processes".format(
        argv.numprocesses))
    pool = Pool(nodes=argv.numprocesses)

    argvs = [argv] * len(tables)
    source_conn_details = [source_conn_detail] * len(tables)
    target_conn_details = [target_conn_detail] * len(tables)
    pcids = [process_control_id] * len(tables)
    queues = [manager.Queue()] * len(tables)

    logger.debug("Starting a new process for each table in: {tables}".format(
        tables=tables))
    # Execute initsync for each schema/table combination in parallel
    pool.map(initsync_table,
             argvs,
             source_conn_details,
             target_conn_details,
             source_schemas,
             tables,
             target_schemas,
             pcids,
             query_conditions,
             queues,
             chunksize=1)  # Ensure tables are processed in sequence
    # and workers are fully utilised

    pool.close()
    logger.debug("parallelise_initsync: Pool joining")
    pool.join()
    logger.debug("parallelise_initsync: Pool joined")

    all_table_results = {}
    for q in queues:
        size = q.qsize()
        message = q.get()
        logger.debug("Message queue size = {s}, message = {m}".format(
            s=size, m=message))
        all_table_results.update(message)

    logger.debug("all_table_results = {r}".format(r=all_table_results))
    return all_table_results
コード例 #23
0
    def _marginal_acq_parallel(self, X):
        """
        """
        marginal_acqX = np.zeros(
            (X.shape[0], len(self.utility_parameter_samples)))
        n_w = self.W_samples.shape[0]
        pool = Pool(4)
        for h in range(self.number_of_gp_hyps_samples):
            self.model.set_hyperparameters(h)
            pool.map(self._parallel_acq_helper, X)
            marginal_acqX += np.atleast_2d(
                pool.map(self._parallel_acq_helper, X))

        marginal_acqX /= (self.number_of_gp_hyps_samples * n_w)
        return marginal_acqX
コード例 #24
0
def create_features(WRITE_DB, FP, all_tables, schemas, CPUS, selected_schema, selected_table):
 
    #define key dataframe
    
    key_df = get_key(all_tables[selected_table], schemas,selected_schema)
    key_df = key_df.sort_values(by = ['key','date'])
    
    if selected_schema == 'scoring_schema':
        key_df = key_df[['key']]
    else:
        key_df = key_df[['key','date','target']]
    
    pool = Pool(CPUS)
    
    #Features output framework
    all_functions = inspect.getmembers(FP, inspect.isfunction)
    all_functions = [x[1] for x in all_functions]
#     print(all_functions)
    
    args = (WRITE_DB,key_df, schemas, all_tables, fc_protocol,selected_schema)
    all_functions = [(x,args) for x in all_functions]
    
    
    if WRITE_DB:        
       
        temp = pool.map(trig_func, all_functions)
        df = pd.concat(temp, axis = 1)
        df = pd.concat([key_df,df], axis = 1)
        
        engine = conn_eng()
        if selected_schema == 'scoring_schema':
            df.to_sql(all_tables['scoring_table'],schema= schemas['output_schema'],
                      con=engine, index=False,if_exists ='replace')
           
        else:   
            df.to_sql(all_tables['features_table'],schema= schemas['output_schema'],
                      con=engine, index=False,if_exists ='replace')
            
        engine.dispose()
        del engine
        #print_summary(MISSING_VALUE_TREATMENT,df)       
    else:              
        temp = pool.map(trig_func, all_functions)
        df = pd.concat(temp, axis = 1)
        df = pd.concat([key_df,df], axis = 1)
#         print_summary(MISSING_VALUE_TREATMENT,df)
        return df
    return 'Files written to DB'
コード例 #25
0
def makeRadial():
    rad, angle = d["radial"]["rad"], d["radial"]["angle"]
    args = np.linspace(angle, angle + np.pi, frameCount)

    pool = Pool(4)

    while True:
        subIm = JuliaTools.subImage(c=rad * np.exp(1j * angle),
                                    r=r,
                                    n=10,
                                    p=p,
                                    iters=iters,
                                    split=split,
                                    save=False,
                                    aura=False)
        isBlackList = pool.map(subIm, coords)
        if not all(isBlackList):
            break
        else:
            rad *= 0.975

    # Circular arc c follows in complex plane
    cPath = rad * np.exp(1j * args)

    for frame in xrange(frameCount):
        subIm = JuliaTools.subImage(c=cPath[frame],
                                    r=r,
                                    n=n,
                                    p=p,
                                    iters=iters,
                                    split=split)
        isBlackList = pool.map(subIm, coords)
        allBlack = all(isBlackList)

        if not allBlack:
            JuliaTools.makeFrame(frame, n, split, coords)

    pool.close()

    JuliaTools.prepareForFFmpeg(frameCount=frameCount, loop=True)

    with open("tweet.txt", "w") as out:
        out.write("Images generated using constants"
                  " on a circular arc of radius {:03.2f}.".format(rad))

    stop = timeit.default_timer()

    print stop - start
コード例 #26
0
        def decorator(data, par):
            if (type(data) != type(np.zeros(3))) and (type(data) != type(
                    pd.DataFrame([0]))) and (type(data) != type([])):
                raise TypeError(
                    'data type must be a list, a numpy array or a pandas DataFrame!'
                )

            if self.n_proc == 1:
                print(' processing data in single-core mode')
                result = func(data, par)
            else:
                print(' processing data in %d-core mode' % self.n_proc)
                pool = Pool()
                data = self._multiproc_data_split(data)
                result = pool.map(func, data, repeat(par))
                if type(result[0]) == type(np.array([0])):
                    result = np.concatenate(result, axis=0)
                elif type(result[0]) == type(pd.DataFrame([0])):
                    result = pd.concat(result, axis=0)
                elif type(result) == type([]):
                    res = []
                    [res.extend(tmp) for tmp in result]
                    result = res
                else:
                    print(
                        'mprows: output data structure of the given ' +
                        'function is not recognized. Return a list containing results of each process !'
                    )
            return result
コード例 #27
0
def calculate(population, maps_generator_from_sample):
    logging.info("Condi_evo: calculating population:")
    logging.info(population)
    pool = Pool(EC['POOL_SIZE'])
    for it in range(EC['NO_OF_ITERATIONS']):
        print(it)
        logging.info("Condi_evo: calculating iteration " + str(it))
        iter_start = time.time()

        population = pool.map(lambda s: sample_acceptance_score(
            s, maps_generator_from_sample(s)),
                              population)  # EVALUATING POPULATION
        # for sample in population:
        #     sample[3]+=1
        eval_fin = time.time()
        logging.info("TIME: Evaluating population finished in " +
                     str(eval_fin - iter_start))

        population = evolve_condi_population(population)  # EVOLVING POPULATION

        evol_fin = time.time()
        logging.info("TIME: Evolving population finished in " +
                     str(evol_fin - eval_fin))
        logging.info("TIME: Iteration finished in " +
                     str(evol_fin - iter_start))

        with open(STORAGE_PREFIX + ".population", 'w') as f:
            f.write(json.dumps(population))

        if it % 10 == 9:
            display_n(population, 3)
    return population
コード例 #28
0
    def image_division(self):
        image_rows, image_cols = self.__image.shape[:2]
        print self.__image.shape[:2]
        grid_indices = [
            np.array([x, y])
            for x in xrange(0, image_cols - self.__GRID_SIZE, self.__GRID_SIZE)
            for y in xrange(0, image_rows - self.__GRID_SIZE, self.__GRID_SIZE)
        ]
        pool = Pool()
        output = pool.map(self.grid_division, grid_indices)
        threshod_sucess_sample = 6
        ransacGrouper = RansacLine(1, threshod_sucess_sample, 25, 2)
        for index, edgels in enumerate(output):
            if len(edgels) > threshod_sucess_sample:
                ransacGrouper.edgels = edgels
                ransac_groups = ransacGrouper.applay_parallel_ransac()
                self.line_segment(ransac_groups)

        # print len(self.__lines)
        # for line in self.__lines:
        #     print (line.slope, line.intercept)
        #     coefficients = np.array([line.slope, line.intercept])
        #     # print "cof: ", coefficients
        #     x = np.array([20, 50], dtype=np.int32)
        #     polynomial = np.poly1d(coefficients)
        #     # print "Poly: ", polynomial
        #     y = polynomial(x)
        #     y = [int(e) for e in y]
        #     print "x: ", x, "y: ", y
        #     cv2.line(self.__image, (x[0], y[0]), (x[1], y[1]), (0, 255, 0), 1)

        cv2.imshow("image", self.__image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
コード例 #29
0
ファイル: Fragments.py プロジェクト: nick-youngblut/SIPSim
def main(args):
    """
    Parmeters
    ---------
    args : dict
        See ``fragments`` subcommand
    """
    # list of genome files
    genomeList =  Utils.parseGenomeList(args['<genomeList>'], 
                                        filePath=args['--fp'])
        
    # analyzing each genome (in parallel)    
    pfunc = functools.partial(by_genome, args=args)
    
    # difussion calc in parallel
    pool = ProcessingPool(nodes=int(args['--np']))
    if args['--debug']:
        fragList = map(pfunc, genomeList)
    else:
        fragList = pool.map(pfunc, genomeList)

    # writing out table
    if args['--tbl']:
        write_fragList(fragList)
    else:
        dill.dump(fragList, sys.stdout)
コード例 #30
0
    def map(self, f, seq):
        """
        Parallel implementation of map.

        Parameters
        ----------
        f : callable
            A function to map to all the values in 'seq'

        seq : iterable
            An iterable of values to process with 'f'

        Returns
        -------
        results : list, shape=[len(seq)]
            The evaluated values
        """
        if self.n_jobs < 1:
            n_jobs = multiprocessing.cpu_count()
        elif self.n_jobs == 1:
            return list(map(f, seq))
        else:
            n_jobs = self.n_jobs

        pool = Pool(n_jobs)
        results = list(pool.map(f, seq))
        # Closing/joining is not really allowed because pathos sees pools as
        # lasting for the duration of the program.
        return results
コード例 #31
0
    def optimize(self, f=None, df=None, f_df=None, duplicate_manager=None, x_baseline=None):
        """
        Optimizes the input function.

        :param f: function to optimize.
        :param df: gradient of the function to optimize.
        :param f_df: returns both the function to optimize and its gradient.

        """
        self.f = f
        self.df = df
        self.f_df = f_df
        

        ## --- Update the optimizer, in case context has beee passed.
        self.optimizer = choose_optimizer(self.optimizer_name, self.context_manager.noncontext_bounds)

        ## --- Selecting the anchor points and removing duplicates
        if self.type_anchor_points_logic == max_objective_anchor_points_logic:
            anchor_points_generator = ObjectiveAnchorPointsGenerator(self.space, random_design_type, f, self.n_starting)
        elif self.type_anchor_points_logic == thompson_sampling_anchor_points_logic:
            anchor_points_generator = ThompsonSamplingAnchorPointsGenerator(self.space, sobol_design_type, self.model)
           
        ## -- Select the anchor points (with context)
        anchor_points, anchor_points_values = anchor_points_generator.get(num_anchor=self.n_anchor, duplicate_manager=duplicate_manager, context_manager=self.context_manager, get_scores=True)

        if x_baseline is not None:
            f_baseline = f(x_baseline)[:, 0]
            anchor_points = np.vstack((anchor_points, x_baseline))
            anchor_points_values = np.concatenate((anchor_points_values, f_baseline))
        #print(anchor_points.shape)
        #print(anchor_points_values.shape)
        print('anchor points')
        print(anchor_points)
        print(anchor_points_values)
        parallel = True
        if parallel:
            pool = Pool(4)
            optimized_points = pool.map(self._parallel_optimization_wrapper, anchor_points)
        else:
            #pass
            optimized_points = [apply_optimizer(self.optimizer, a, f=f, df=None, f_df=f_df, duplicate_manager=duplicate_manager, context_manager=self.context_manager, space = self.space) for a in anchor_points]                 
        
        print('optimized points')
        print(optimized_points)            
        x_min, fx_min = min(optimized_points, key=lambda t:t[1])
        if x_baseline is not None:
            for i in range(x_baseline.shape[0]):
                val = f_baseline[i]
                if val < fx_min:
                    print('baseline was best found')
                    print(val)
                    x_min = np.atleast_2d(x_baseline[i, :])
                    fx_min = val
        #if np.asscalar(anchor_points_values[0]) < np.asscalar(fx_min):
            #print('anchor_point was best found')
            #fx_min = np.atleast_2d(anchor_points_values[0])
            #x_min = np.atleast_2d(anchor_points[0])

        return x_min, fx_min
コード例 #32
0
ファイル: base.py プロジェクト: crcollins/molml
    def map(self, f, seq):
        """
        Parallel implementation of map.

        Parameters
        ----------
        f : callable
            A function to map to all the values in 'seq'

        seq : iterable
            An iterable of values to process with 'f'

        Returns
        -------
        results : list, shape=[len(seq)]
            The evaluated values
        """
        if self.n_jobs < 1:
            n_jobs = multiprocessing.cpu_count()
        elif self.n_jobs == 1:
            return list(map(f, seq))
        else:
            n_jobs = self.n_jobs

        pool = Pool(n_jobs)
        results = list(pool.map(f, seq))
        # Closing/joining is not really allowed because pathos sees pools as
        # lasting for the duration of the program.
        return results
コード例 #33
0
ファイル: nputil.py プロジェクト: QhelDIV/xgutils
def parallelMap(func, args, batchFunc=None, zippedIn=True, zippedOut=False, cores=-1, quiet=False):
    from pathos.multiprocessing import ProcessingPool
    """Parallel map using multiprocessing library Pathos

    Args:
        stderr (function): func
        args (arguments): [arg1s, arg2s ,..., argns](zippedIn==True) or [[arg1,arg2,...,argn], ...](zippedIn=False)
        batchFunc (func, optional): TODO. Defaults to None.
        zippedIn (bool, optional): See [args]. Defaults to True.
        zippedOut (bool, optional): See [Returns]. Defaults to False.
        cores (int, optional): How many processes. Defaults to -1.
        quiet (bool, optional): if do not print anything. Defaults to False.

    Returns:
        tuples: [out1s, out2s,..., outns](zippedOut==False) or [[out1,out2,...,outn], ...](zippedOut==True)
    """    
    if batchFunc is None:
        batchFunc = lambda x:x
    if zippedIn==True:
        args = list(map(list, zip(*args))) # transpose
    if cores==-1:
        cores = os.cpu_count()
    pool = ProcessingPool(nodes=cores)
    batchIdx = list(range(len(args[0])))
    batches = array2batches(batchIdx, cores)
    out = []
    iterations = enumerate(batches) if quiet==True else progbar(enumerate(batches))
    for i,batch in iterations:
        batch_args = [[arg[i] for i in batch] for arg in args]
        out.extend( pool.map(func, *batch_args) )
    if zippedOut == False:
        if type(out[0]) is not tuple:
            out=[(item,) for item in out]
        out = list(map(list, zip(*out)))
    return out
コード例 #34
0
ファイル: featurize.py プロジェクト: arose/deepchem
  def _featurize_complexes(self, df, featurizer, parallel=True,
                           worker_pool=None):
    """Generates circular fingerprints for dataset."""
    protein_pdbs = list(df["protein_pdb"])
    ligand_pdbs = list(df["ligand_pdb"])
    complexes = zip(ligand_pdbs, protein_pdbs)

    def featurize_wrapper(ligand_protein_pdb_tuple):
      ligand_pdb, protein_pdb = ligand_protein_pdb_tuple
      print("Featurizing %s" % ligand_pdb[0:2])
      molecule_features = featurizer.featurize_complexes([ligand_pdb], [protein_pdb])
      return molecule_features

    if worker_pool is None:
      features = []
      for ligand_protein_pdb_tuple in zip(ligand_pdbs, protein_pdbs):
        features.append(featurize_wrapper(ligand_protein_pdb_tuple))
    else:
      if worker_pool is None:
        worker_pool = ProcessingPool(mp.cpu_count())
        features = worker_pool.map(featurize_wrapper, 
                                   zip(ligand_pdbs, protein_pdbs))
      else:
        features = worker_pool.map_sync(featurize_wrapper, 
                                        zip(ligand_pdbs, protein_pdbs))
      #features = featurize_wrapper(zip(ligand_pdbs, protein_pdbs))
    df[featurizer.__class__.__name__] = list(features)
コード例 #35
0
def run(non_iter_args, do_multiprocessing):
    [
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    ] = non_iter_args

    partial_gaincalc_oneset = partial(
        calc_weights_oneset,
        weightcalcdata,
        weightcalculator,
        box,
        startindex,
        size,
        newconnectionmatrix,
        method,
        boxindex,
        filename,
        headerline,
        writeoutput,
    )

    if do_multiprocessing:
        pool = Pool(processes=pathos.multiprocessing.cpu_count())
        pool.map(partial_gaincalc_oneset, weightcalcdata.causevarindexes)

        # Current solution to no close and join methods on ProcessingPool
        # https://github.com/uqfoundation/pathos/issues/46

        s = pathos.multiprocessing.__STATE["pool"]
        s.close()
        s.join()
        pathos.multiprocessing.__STATE["pool"] = None

    else:
        for causevarindex in weightcalcdata.causevarindexes:
            partial_gaincalc_oneset(causevarindex)

    return None
コード例 #36
0
    def compute_importance(self, alpha):
        """

        """
        pool = ProcessingPool(self._numJobs)
        errors = pool.map(self._computeImportanceOfTree,
                          [alpha] * self._numTree, range(self._numTree))
        return np.array(errors).mean(axis=0)
コード例 #37
0
ファイル: utils.py プロジェクト: FayolChang/mlp
 def apply(values):
     pool = Pool()
     # result = []
     result = pool.map(func, values)
         # result.append(ret)
     # pool.close()
     # pool.join()
     return result
コード例 #38
0
    def alignAllShapes( self ):
        import pathos.multiprocessing as mp
        start = time.time()
        pool = Pool()
        self.allShapes = pool.map( self.alignOneShape, self.allShapes )
#        for sh in self.allShapes:
#          self.alignOneShape( sh )
        print 'alignAllShapes: %f' % (time.time() - start  )
        return 
コード例 #39
0
ファイル: opflowreg.py プロジェクト: chrinide/image-funcut
def register_stack_to_template(frames, template, regfn, njobs=4, **fnargs):
    """
    Given stack of frames (or a FSeq obj) and a template image, 
    align every frame to template and return a list of functions,
    which take an image and return warped image, aligned to template.
    """
    if njobs > 1:
        pool = ProcessingPool(nodes=njobs) 
        out = pool.map(partial(regfn, template=template, **fnargs), frames)
    else:
        out = np.array([regfn(img, template, **fnargs) for img in frames])
    return out
コード例 #40
0
ファイル: utils.py プロジェクト: hoidn/packages
def parallelmap(func, data, nodes = None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs
    """
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, data)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
コード例 #41
0
def main():
	from hyperopt import fmin,tpe,hp,Trials
	from hyperopt.mongoexp import MongoTrials
	import os 

	fit_params=eval(open('fit_parameters.txt').read())
	fit_params['root']=os.getcwd()
	directory=init_directory(fit_params)
	if fit_params['optimization']=='hyperopt':
		space=search_space(fit_params)
		trials=Trials()
		best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials)
		plot_results(trials.trials)

	#https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB
	''' commands for MongoDB
	mongod --dbpath . --port 1234
	export PYTHONPATH=$PYTHONPATH:/home/pduggins/influence_susceptibility_conformity
	hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1
	'''
	if fit_params['optimization']=='mongodb':
		space=search_space(fit_params)
		space['directory']=directory
		trials=MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp4')
		best=fmin(run,space=space,algo=tpe.suggest,max_evals=fit_params['max_evals'],trials=trials)
		plot_results(trials.trials)

	if fit_params['optimization']=='evolve':
		from pathos.multiprocessing import ProcessingPool as Pool
		from pathos.helpers import freeze_support #for Windows
		import numpy as np
		import pandas as pd
		# freeze_support()
		evo_pop=init_evo_pop(fit_params)
		pool = Pool(nodes=fit_params['threads'])

		for g in range(fit_params['generations']):
			exp_params=[value['P'] for value in evo_pop.itervalues()]
			fitness_list=pool.map(run, exp_params)
			# new_gen_list=tournament_selection(fitness_list,fit_params)
			new_gen_list=rank_proportional_selection(fitness_list)
			remade_pop=remake(evo_pop,new_gen_list)
			mutated_pop=mutate(remade_pop,evo_pop,fit_params)
			evo_pop=mutated_pop
			# crossed_pop=crossover(mutated_pop)
			# evo_pop=crossed_pop
			mean_F=np.average([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()])
			std_F=np.std([evo_pop[ind]['F'] for ind in evo_pop.iterkeys()])
			print '\nGeneration %s: mean_F=%s, std F=%s' %(g+1,mean_F,std_F) 

		out_pop=pd.DataFrame([evo_pop])
		out_pop.reset_index().to_json('evo_pop.json',orient='records')
コード例 #42
0
ファイル: opflowreg.py プロジェクト: chrinide/image-funcut
def apply_warps(warps, frames, njobs=4):
    """
    returns result of applying warps for given frames (one warp per frame)
    """
    if njobs > 1 :
        pool = ProcessingPool(nodes=njobs)
        out = np.array(pool.map(parametric_warp, frames, warps))
    else:
        out = np.array([parametric_warp(f,w) for f,w in itt.izip(frames, warps)])
    if isinstance(frames, fseq.FrameSequence):
        out = fseq.open_seq(out)
        out.meta = frames.meta
    return out
コード例 #43
0
	def multi_ray_sim(self, sources, procs=8):
		self.minener = 1e-10 # minimum energy threshold
		self.itmax = 1000 # stop iteration after this many ray bundles were generated (i.e. 
					# after the original rays intersected some surface this many times).
		# The multiprocessing raytracing method to call from the original engine.
		if len(sources) != procs:
			raise Exception('Number of sources and processors do not agree')

		# Creates a pool of processes and makes them raytrace one different source each. The resm list returned is a list of copies of the original engine post raytrace.
		pool = Pool(processes=procs)
		resm = pool.map(self.trace, sources)

		# New tree container and length envaluation to redimension it.
		tree_len = N.zeros(len(resm), dtype=N.int)
		trees = []

		for eng in xrange(len(resm)):
			# Get and regroup results in one tree and assembly only:
			S = resm[eng]._asm.get_surfaces()
			tree_len[eng] = len(resm[eng].tree._bunds)
			trees.append(resm[eng].tree)
			# Next loop is to get the optics callable objects and copy regroup their values without asumptions about what they are.
			for s in xrange(len(S)):
				part_res = S[s]._opt.__dict__
				keys = S[s]._opt.__dict__.keys()
				for k in xrange(len(keys)):
					if (keys[k] == '_opt') or (keys[k] == '_abs'):
						continue
					if len(self._asm.get_surfaces()[s]._opt.__dict__[keys[k]]) < 1:
						self._asm.get_surfaces()[s]._opt.__dict__[keys[k]] = part_res[keys[k]]
					elif len(part_res[keys[k]]) < 1:
						continue
					else:
						self._asm.get_surfaces()[s]._opt.__dict__[keys[k]][0] = N.append(self._asm.get_surfaces()[s]._opt.__dict__[keys[k]][0], part_res[keys[k]][0], axis=1)

		# Regroup trees:
		self.tree = RayTree() # Create a new tree for all
		for t in xrange(N.amax(tree_len)): # Browse through general tree levels up to the maximum length that has been raytraced
			for eng in xrange(len(resm)): # Browse through bundles of each parallel engine.
				if t<(tree_len[eng]): # to not go over the length of the present parallel tree.
					if t==len(self.tree._bunds): # if the index is greater than the actual length of the general tree, add a new bundle to the general tree with the present parallel bundle to initialise it.
						bundt = trees[eng]._bunds[t]
					else:	
						if t>0: # adapt parents indexing prior to concatenation
							trees[eng]._bunds[t].set_parents(trees[eng]._bunds[t].get_parents()+len(self.tree._bunds[t].get_parents()))
						bundt = concatenate_rays([bundt, trees[eng]._bunds[t]])
			self.tree.append(bundt)
		
		trees = 0
コード例 #44
0
ファイル: utils.py プロジェクト: hoidn/utils
def parallelmap(func, lst, nodes = None):
    """
    Return the averaged signal and background (based on blank frames) over the given runs using
    multiprocessing (as opposed to MPI).
    """
    from pathos.multiprocessing import ProcessingPool
    from pathos import multiprocessing
    if not nodes:
        nodes = multiprocessing.cpu_count() - 2
    pool = ProcessingPool(nodes=nodes)
    try:
        return pool.map(func, lst)
    except KeyboardInterrupt:
        pool.terminate()
        pool.join()
コード例 #45
0
def launch_simulation_parallel(simulation_config,
                               max_iterations,
                               parallel_blocks=gtconfig.parallel_blocks,
                               show_progress=True):
    """
    Parallel version of the simulation launch, to maximize CPU utilization.

    :param catalog_size: Number of defects present on the system.
    :param priority_generator: Generator for the priority of the defects.
    :param team_capacity:
    :param reporters_config:
    :param resolution_time_gen:
    :param max_iterations:
    :param max_time:
    :param dev_team_bandwidth:
    :param gatekeeper_config:
    :param inflation_factor:
    :param quota_system:
    :param parallel_blocks:
    :return:
    """
    pool = Pool(processes=parallel_blocks)
    samples_per_worker = max_iterations / parallel_blocks

    logger.info("Launching " + str(max_iterations) + " replications IN PARALLEL. Using " + str(parallel_blocks) +
                " workers with " + str(samples_per_worker) + " samples each.")

    worker_inputs = []

    for block_id in range(parallel_blocks):
        worker_input = {'simulation_config': simulation_config,
                        'max_iterations': samples_per_worker,
                        'block_id': block_id,
                        'show_progress': False}

        worker_inputs.append(worker_input)

    # Showing progress bar of first batch
    worker_inputs[0]['show_progress'] = show_progress
    worker_outputs = pool.map(launch_simulation_wrapper, worker_inputs)

    logger.info(str(max_iterations) + " replications finished. Starting output consolidation.")
    simulation_metrics = SimulationMetrics()

    for output in worker_outputs:
        simulation_metrics.append_results(output)

    return simulation_metrics
コード例 #46
0
ファイル: avg_bgsubtract_hdf.py プロジェクト: ggggggggg/LCLS
def get_signal_bg_many_parallel(runList, detid, **kwargs):
    """
    Return the averaged signal and background (based on blank frames) over the given runs
    """
    def mapfunc(run_number):
        return get_signal_bg_one_run(run_number, detid, **kwargs)

    MAXNODES = 14
    pool = ProcessingPool(nodes=min(MAXNODES, len(runList)))
    bg = np.zeros(DIMENSIONS_DICT[detid])
    signal = np.zeros(DIMENSIONS_DICT[detid]) 
    run_data = pool.map(mapfunc, runList)
    for signal_increment, bg_increment in run_data:
        signal += (signal_increment / len(runList))
        bg += (bg_increment / len(runList))
    return signal, bg
コード例 #47
0
    def _calculate_s_powder_over_atoms_core(self, q_indx=None):
        """
        Helper function for _calculate_s_powder_1d.
        :returns: Python dictionary with S data
        """
        atoms_items = {}
        atoms = range(self._num_atoms)
        self._prepare_data(k_point=q_indx)

        if PATHOS_FOUND:
            p_local = ProcessingPool(nodes=AbinsModules.AbinsParameters.threads)
            result = p_local.map(self._calculate_s_powder_one_atom, atoms)
        else:
            result = [self._calculate_s_powder_one_atom(atom=atom) for atom in atoms]

        for atom in range(self._num_atoms):
            atoms_items["atom_%s" % atom] = {"s": result[atoms.index(atom)]}
            self._report_progress(msg="S for atom %s" % atom + " has been calculated.")
        return atoms_items
コード例 #48
0
ファイル: dataPlot.py プロジェクト: Vifespoir/mLearning
    def transpose_index(self):  # WORKS ONLY FOR TEST DATA
        """Transpose the data according to the index."""

        data = self.data
        indexes = list(set(data.index))

        names, datasets = [], []
        for name in indexes:
            names.append(name)
            datasets.append(data[[name in i for i in data.index]])

        plotSets = zip(names, datasets)

        pool = ProcessingPool()
        plots = []
        for name, dataset in plotSets:
            plots.append(pool.map(self.create_transposed_plot, [name], [dataset]))

        logging.debug('Index transposed')

        return plots
コード例 #49
0
ファイル: evaluator.py プロジェクト: WING-NUS/corpSearch
    def evaluate(self):
        """Evaluates the system using 10-fold cross validation, returning
        a dictionary of results keyed by classifier type."""
        trainer = Trainer(self.profiles, self.profile_type,
                          self.converter, self.network)
        training_set = trainer.generate_training_set()

        profiles = numpy.array(list(self.profiles))
        data = numpy.array(training_set.data)
        labels = numpy.array(training_set.labels)

        fold_iterator = cross_validation.StratifiedKFold(labels,
                                                         n_folds=10,
                                                         shuffle=True,
                                                         random_state=42)

        official_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                x['posts']))
                                  for x in self.profiles if x['label'] == 2)
        affiliate_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                 x['posts']))
                                   for x in self.profiles if x['label'] == 1)

        official_profiles = defaultdict(list)
        for name, profile in official_profile_pairs:
            official_profiles[name].append(profile)

        affiliate_profiles = defaultdict(list)
        for name, profile in affiliate_profile_pairs:
            affiliate_profiles[name].append(profile)

        classification_results = defaultdict(list)
        fold = 1
        for train, test in fold_iterator:
            classifiers = initialize_classifiers()

            training_data = data[train]
            training_labels = labels[train]

            test_set = itertools.compress(profiles[test], labels[test])
            company_names = set(x['name'] for x in test_set)
            print 'Test set', fold, '-', len(company_names), 'companies.'

            for classifier in classifiers:
                classifier_name = classifier['type']
                c = classifier['classifier']
                trained = c.fit(training_data, training_labels)

                system = SingleNetworkSearcher(
                    classifier=trained,
                    searchengine=self.search_engine,
                    profile_converter=self.converter,
                    network=self.network)

                number_of_workers = int(multiprocessing.cpu_count() * 0.75)
                worker_pool = ProcessingPool(number_of_workers)
                all_results = worker_pool.map(system.query, company_names)

                combined_official_results = []
                combined_affiliate_results = []
                for idx, name in enumerate(company_names):
                    official_results = official_profiles[name]
                    affiliate_results = affiliate_profiles[name]

                    results = all_results[idx]
                    classified_official = results.official
                    classified_affiliate = results.affiliate
                    classified_unrelated = results.unrelated

                    marked_official_handles = [x['profile'].handle.lower()
                                               for x in classified_official]
                    marked_affiliate_handles = [x['profile'].handle.lower()
                                                for x in classified_affiliate]
                    marked_unrelated_handles = [x['profile'].handle.lower()
                                                for x in classified_unrelated]
                    official_handles = [x.handle.lower()
                                        for x in official_results]
                    affiliate_handles = [x.handle.lower()
                                         for x in affiliate_results]

                    official_counts = MetricCalculator.count_positives(
                        actual_handles=official_handles,
                        marked_positive_handles=marked_official_handles,
                        marked_negative_handles=(marked_affiliate_handles
                                                 + marked_unrelated_handles))
                    combined_official_results.append(official_counts)

                    affiliate_counts = MetricCalculator.count_positives(
                        actual_handles=affiliate_handles,
                        marked_positive_handles=marked_affiliate_handles,
                        marked_negative_handles=(marked_unrelated_handles
                                                 + marked_official_handles))
                    combined_affiliate_results.append(affiliate_counts)

                official_metrics = MetricCalculator.fold_metrics(
                    combined_official_results)
                affiliate_metrics = MetricCalculator.fold_metrics(
                    combined_affiliate_results)

                result = {
                    'official': official_metrics,
                    'affiliate': affiliate_metrics
                }
                classification_results[classifier_name].append(result)

            fold += 1

        return classification_results
コード例 #50
0
ファイル: evaluator.py プロジェクト: WING-NUS/corpSearch
    def evaluate_statistical(self):
        """Evaluates the system using 10-fold cross validation, returning
        a dictionary containing the number of correct results per-fold in
        each class."""
        trainer = Trainer(self.profiles, self.profile_type,
                          self.converter, self.network)
        training_set = trainer.generate_training_set()

        profiles = numpy.array(list(self.profiles))
        data = numpy.array(training_set.data)
        labels = numpy.array(training_set.labels)

        fold_iterator = cross_validation.StratifiedKFold(labels,
                                                         n_folds=10,
                                                         shuffle=True,
                                                         random_state=42)

        official_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                x['posts']))
                                  for x in self.profiles if x['label'] == 2)
        affiliate_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                 x['posts']))
                                   for x in self.profiles if x['label'] == 1)

        official_profiles = defaultdict(list)
        for name, profile in official_profile_pairs:
            official_profiles[name].append(profile)

        affiliate_profiles = defaultdict(list)
        for name, profile in affiliate_profile_pairs:
            affiliate_profiles[name].append(profile)

        fold = 1
        # This assumes we're just using Random Forest (i.e. one classifier)
        # Ugly hack for now.
        classification_results = {
            'official_correct': [],
            'affiliate_correct': []
        }
        for train, test in fold_iterator:
            classifiers = initialize_classifiers()

            training_data = data[train]
            training_labels = labels[train]

            test_set = itertools.compress(profiles[test], labels[test])
            company_names = set(x['name'] for x in test_set)
            print 'Test set', fold, '-', len(company_names), 'companies.'

            for classifier in classifiers:
                classifier_name = classifier['type']
                c = classifier['classifier']
                trained = c.fit(training_data, training_labels)

                system = SingleNetworkSearcher(
                    classifier=trained,
                    searchengine=self.search_engine,
                    profile_converter=self.converter,
                    network=self.network)

                number_of_workers = int(multiprocessing.cpu_count() * 0.75)
                worker_pool = ProcessingPool(number_of_workers)
                all_results = worker_pool.map(system.query, company_names)

                for idx, name in enumerate(company_names):
                    official_results = official_profiles[name]
                    affiliate_results = affiliate_profiles[name]

                    results = all_results[idx]
                    classified_official = results.official
                    classified_affiliate = results.affiliate

                    marked_official_handles = [x['profile'].handle.lower()
                                               for x in classified_official]
                    marked_affiliate_handles = [x['profile'].handle.lower()
                                                for x in classified_affiliate]

                    official_handles = [x.handle.lower()
                                        for x in official_results]
                    affiliate_handles = [x.handle.lower()
                                         for x in affiliate_results]

                    official_correct = 0
                    for handle in marked_official_handles:
                        if handle in official_handles:
                            official_correct += 1

                    affiliate_correct = 0
                    for handle in marked_affiliate_handles:
                        if handle in affiliate_handles:
                            affiliate_correct += 1

                    classification_results['official_correct'].append(official_correct)
                    classification_results['affiliate_correct'].append(affiliate_correct)

            fold += 1

        return classification_results
コード例 #51
0
ファイル: imgTest.py プロジェクト: sburck11/blobCreator
			filterOn=True
			flatBG=True
			touchingEdge=False
			sigma=random.randint(160, 225)
			shaderSigma=random.randint(15, 20)
			minSize=random.randint(500, 750)
			maxSize=1000
			blobThresh=random.randint(150, 180)
			innerThresh=random.randint(10, 25)
			# name='B2_'+str(i)
		name='B_'+str(i)
	else:
		# GRADE A
		filterOn=True
		flatBG=True
		numBlob=1
		touchingEdge=True
		sigma=120
		shaderSigma=random.randint(15, 20)
		minSize=random.randint(500, 750)
		maxSize=random.randint(1000, 2000)
		blobThresh=random.randint(180, 250)
		innerThresh=random.randint(10, 20)
		name='A_'+str(i)

	testImage=Blob(numBlob, minSize, maxSize, blobThresh, innerThresh,
		sigma, shaderSigma, path, betweenBlobs, touchingEdge, flatBG, filterOn, addColors, name)
	imgArr.append(testImage)

pool.map(Blob.makeImg, imgArr)
コード例 #52
0
ファイル: test_mpmap2.py プロジェクト: WarrenWeckesser/pathos
#!/usr/bin/env python

from pathos.multiprocessing import ProcessingPool as Pool
from pathos.multiprocessing import ThreadingPool as TPool
pool = Pool()
tpool = TPool()

def host(id):
    import socket
    return "Rank: %d -- %s" % (id, socket.gethostname())


print "Evaluate 10 items on 1 proc"
pool.ncpus = 1
res3 = pool.map(host, range(10))
print pool
print '\n'.join(res3)
print ''

print "Evaluate 10 items on 2 proc"
pool.ncpus = 2
res5 = pool.map(host, range(10))
print pool
print '\n'.join(res5)
print ''

print "Evaluate 10 items on ? proc"
pool.ncpus = None
res7 = pool.map(host, range(10)) 
print pool
print '\n'.join(res7)
コード例 #53
0
ファイル: encode_processor.py プロジェクト: saketkc/moca_web
 def parallel_motif_analysis(self, samples_dirs):
     pool = ProcessingPool(nodes=16)
     pool.map(self.sample_motif_analysis, tuple(samples_dirs))
コード例 #54
0
ファイル: encode_processor.py プロジェクト: saketkc/moca_web
 def analyse_samples_parallely(self, samples_dirs):
     pool = ProcessingPool(nodes=15)
     pool.map(self.sample_analysis, tuple(samples_dirs))
コード例 #55
0
 def applay_parallel_ransac(self):
     sample_indices = [i for i in xrange(25)]
     pool = Pool()
     output = pool.map(self.calculate_distance, sample_indices)
     return output
コード例 #56
0

def genseq(idx):

    first = np.where(np.random.multinomial(1,pvals=pops)==1)[0][0]
    last = first
    last_ts = datetime.now()
    result = {'artist_idx':[first],'ts':[last_ts]}
    for i in xrange(seq_length-1):
        next_listen = draw(last)
        last = next_listen
        gap_bin = 120*np.where(np.random.multinomial(1,pvals=td)==1)[0][0]
        gap = np.random.randint(gap_bin,gap_bin+120)
        result['artist_idx'].append(next_listen)
        new_ts = last_ts+timedelta(0,gap)
        result['ts'].append(new_ts)
        last_ts = new_ts

    df = pd.DataFrame(result)
    df['block'] = ((df['artist_idx'].shift(1) != df['artist_idx']).astype(int).cumsum())-1
    df.to_pickle(str(idx)+'.pkl')
    logging.info('idx {} complete'.format(idx))

pool = Pool(cpu_count())
indices = range(n)
pool.map(genseq,indices)
pool.close()



コード例 #57
0
ファイル: test_mpmap3.py プロジェクト: briandrawert/pathos
  zero = [0]
  def inner(addend):
    return addend+augend+zero[0]
  return inner

# build from inner function
add_me = adder(5)

# build from lambda functions
squ = lambda x:x**2

# test 'dilled' multiprocessing for inner
print "Evaluate 10 items on 2 proc:"
pool.ncpus = 2
print pool
print pool.map(add_me, range(10))
print ''

# test 'dilled' multiprocessing for lambda
print "Evaluate 10 items on 4 proc:"
pool.ncpus = 4
print pool
print pool.map(squ, range(10))
print ''

# test for lambda, but with threads
print "Evaluate 10 items on 4 threads:"
tpool.nthreads = 4
print tpool
print tpool.map(squ, range(10))
print ''
コード例 #58
0
ファイル: test_map.py プロジェクト: tufla/pathos
    from pathos.pp import ParallelPythonPool as PPP
    #from pathos.pp import stats
    pp_pool = PPP(4, servers=('localhost:5653','localhost:2414'))
    print pp_pool
    start = time.time()
    res = pp_pool.map(busy_add, _x, _y, _d)
    print "time to queue:", time.time() - start
    start = time.time()
    _pp_pool = list(res)
    print "time to results:", time.time() - start
    #print stats()

    assert _basic == _pp_pool
    print ""

    from pathos.multiprocessing import ProcessingPool as MPP
    mp_pool = MPP(4)
    print mp_pool
    start = time.time()
    res = mp_pool.map(busy_add, _x, _y, _d)
    print "time to queue:", time.time() - start
    start = time.time()
    _mp_pool = list(res)
    print "time to results:", time.time() - start

    assert _basic == _mp_pool
    print ""


# EOF
コード例 #59
0
    # creates a worker pool from given comand line parameter. If the given
    # parameter is to large all detectable CPUs will be utilised. If the given
    # parameter is nonsense only 1 core will be utilized.
    workers = 1
    if len(sys.argv) >= 2 and sys.argv[1].isdigit() and int(sys.argv[1]) > 0:
        workers = cpu_count()
        if int(sys.argv[1]) <= workers:
            workers = int(sys.argv[1])
    
    print 'N:  ' + str(N)
    print 'PW: ' + str(workers)
    sleep(3) # just 3 seconds pause to read the input again.

    # All the magic happens here:
    pool = ProcessingPool(workers)
    Ys = pool.map(steadyState,y0)   

    clock = time()-clock # elapsed time
    print 'Seconds: ' + str(clock) # Not essential but useful.

    # Serilisation of results and stats:
    ss = {'STrange': STrange, 'PFDrange': PFDrange, 'Ys': Ys, 'Sec': clock, 'PoolWorkers': workers}
    output = open('steadyStateAnalysisFixedST_MC_N' + str(N) + '.pkl', 'wb')
    dill.dump(ss,output,2)
    output.close()

else:
    print('Well, something went wrong.')

#================================================================= #
# 3 D plotting routine to obtain figure as in Ebenhoeh et al. 2014 #
コード例 #60
0
ファイル: parallel.py プロジェクト: kezilu/pextant
from pathos.multiprocessing import ProcessingPool as Pool
x = [1,2,3]
y = [1,2,3]
class Test(object):
    def __init__(self, c):
        self.c = c
    def plus(self, x, y):
        return self.c + x+y

if __name__ == '__main__':
    p = Pool(4)
    t = Test(5)
    out = p.map(t.plus, x, y)
    print out