def main(datadir, convert_dir, crop_size):
    try:
        os.mkdir(convert_dir)
    except OSError:
        pass

    filenames = data_util.get_image_files(datadir)

    print('Resizing images in {} to {}'.format(datadir, convert_dir))

    n = len(filenames)

    batch_size = 500
    batches = n // batch_size + 1
    p = Pool()

    args = []

    for f in filenames:
        args.append((convert_size, (datadir, convert_dir, f, crop_size)))

    for i in range(batches):
        print('batch {:>2} / {}'.format(i + 1, batches))
        p.map(convert, args[i * batch_size : (i + 1) * batch_size])

    p.close()
    p.join()
    print('Done')
def main_mh():
    samples_dir_p = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    out_dir_root = Path("/RECH2/huziy/MH_streamflows/")


    if samples_dir_p.name.lower() == "samples":
        out_folder_name = samples_dir_p.parent.name
    else:
        out_folder_name = samples_dir_p.name


    varnames = ["STFA", ]

    # ======================================

    out_dir_p = out_dir_root.joinpath(out_folder_name)

    if not out_dir_p.is_dir():
        out_dir_p.mkdir(parents=True)


    inputs = []
    for y in range(1981, 2010):
        inputs.append(dict(year=y, varnames=varnames, samples_dir=samples_dir_p, out_dir=out_dir_p, target_freq_hours=24))

    # Extract the data for each year in parallel
    pool = Pool(processes=3)
    pool.map(extract_data_for_year_in_parallel, inputs)
def run_parallel(num_processes, experiment_names, methods, sparsity_factors, run_ids):
    """
    Run multiple experiments in parallel.

    Parameters
    ----------
    num_processes : int
        The maximum number of processes that can run concurrently.
    experiment_names : list of str
        The names of experiments to run.
    methods : list of str
        The methods to run the experiments under (mix1, mix2, or full).
    sparsity_factors : list of float
        The sparsity of inducing points to run the experiments at.
    run_ids : list of int
        The ids of the configurations under which to run the experiments.
    """
    # Setup an array of individual experiment configurations.
    experiment_configs = []
    for experiment in experiment_names:
        for method in methods:
            for sparsity_factor in sparsity_factors:
                for run_id in run_ids:
                    experiment_configs.append({'experiment_name': experiment,
                                               'method': method,
                                               'sparsity_factor': sparsity_factor,
                                               'run_id': run_id})

    # Now run the experiments.
    pool = Pool(num_processes)
    pool.map(run_config, experiment_configs)
Beispiel #4
0
class Pool(object):
  '''
  
  '''
  def __init__(self, **pool_kwargs):
  
    try:
      kw = KwargsCheck(MPIPool, pool_kwargs)
      self._pool = MPIPool(**kw)
      self.MPI = True
    except (ImportError, ValueError):
      kw = KwargsCheck(MultiPool, pool_kwargs)
      self._pool = MultiPool(**kw)
      self.MPI = False
    
    if self.MPI:
      if not self._pool.is_master():
        self._pool.wait()
        sys.exit(0)
  
  def map(self, f, x, args = (), kwargs = {}): 
    '''
    
    '''
    if len(args) or len(kwargs):
      w = wrap(f, *args, **kwargs)  
      return self._pool.map(w, x)
    else:
      return self._pool.map(f, x)
  
  def close(self):
    self._pool.close()
    
Beispiel #5
0
def main_canesm2_rcp85():
    samples_dir_p = Path("/RECH/data/Simulations/CRCM5/North_America/NorthAmerica_0.44deg_CanRCP85_B1/Samples")

    out_dir_root = Path("/RECH2/huziy/BenAlaya/")


    if samples_dir_p.name.lower() == "samples":
        out_folder_name = samples_dir_p.parent.name
    else:
        out_folder_name = samples_dir_p.name


    varnames = ["PR", integrated_wv_RPN_name]  # Total precipitation m/s; integrated ice, liquid water and vapor (in kg/m**2) averaged over last MOYHR

    # ======================================

    out_dir_p = out_dir_root.joinpath(out_folder_name)

    if not out_dir_p.is_dir():
        out_dir_p.mkdir()


    inputs = []
    for y in range(2006, 2101):
        inputs.append(dict(year=y, varnames=varnames, samples_dir=samples_dir_p, out_dir=out_dir_p, target_freq_hours=6, calendar_str="365_day"))

    # Extract the data for each year in parallel
    pool = Pool(processes=3)
    pool.map(extract_data_for_year_in_parallel, inputs)
Beispiel #6
0
    def run_parallel(n_process):
        """
        Creates a process for each element in the array returned by ``get_configs()`` and the experiment corresponding
        the each element. The maximum number of processes to run in parallel is determined by ``n_process``
        """

        p = Pool(n_process)
        p.map(run_config, ExperimentRunner.get_configs())
def main():
    # update_item_list(SQL_USER, SQL_PASS, SQL_DATABASE)
    engine = create_engine('mysql+mysqlconnector://%s:%s@localhost/%s' % (SQL_USER, SQL_PASS, SQL_DATABASE))
    region_id = 10000002
    item_id_list = [int(index) for (index, row) in pd.read_sql_table('items', engine, index_col='item_id').iterrows()]
    data_write = partial(update_price_data, region_id)
    p = Pool(initializer=init_function, initargs=(SQL_USER, SQL_PASS, SQL_DATABASE))
    p.map(data_write, item_id_list)
def main_crcm5_nemo():
    label = "CRCM5_NEMO"

    period = Period(
        datetime(1980, 1, 1), datetime(2015, 12, 31)
    )


    pool = Pool(processes=10)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]


        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }



        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        vname_map = {}
        vname_map.update(vname_map_CRCM5)
        vname_map.update({
            default_varname_mappings.SNOWFALL_RATE: "SN"
        })

        label_to_config = OrderedDict([(
            label, {
                DataManager.SP_BASE_FOLDER: "/snow3/huziy/NEI/GL/erai0.75deg_driven/GL_with_NEMO_dtN_1h_and_30min/Samples",
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: default_varname_mappings.vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
def main_crcm5_hl():
    label = "CRCM5_HL"

    period = Period(
        datetime(1980, 1, 1), datetime(2009, 12, 31)
    )


    pool = Pool(processes=12)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]


        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        vname_map = {}
        vname_map.update(vname_map_CRCM5)
        vname_map.update({
            default_varname_mappings.SNOWFALL_RATE: "U3"
        })

        label_to_config = OrderedDict([(
            label, {
                DataManager.SP_BASE_FOLDER: "/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected",
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}_monthly".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config,
            period=current_month_period,
            months_of_interest=current_month_period.months_of_interest,
            nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
def main_future(nprocs=20):

    period = Period(
        datetime(2079, 1, 1), datetime(2100, 12, 31)
    )

    label = "CRCM5_NEMO_fix_TT_PR_CanESM2_RCP85_{}-{}_monthly".format(period.start.year, period.end.year)

    vname_to_level_erai = {
        T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
        U_WE: VerticalLevel(1, level_kinds.HYBRID),
        V_SN: VerticalLevel(1, level_kinds.HYBRID),
    }

    base_folder = "/scratch/huziy/Output/GL_CC_CanESM2_RCP85/coupled-GL-future_CanESM2/Samples"

    vname_map = {}
    vname_map.update(vname_map_CRCM5)
    # vname_map[default_varname_mappings.SNOWFALL_RATE] = "SN"
    vname_map[default_varname_mappings.SNOWFALL_RATE] = "XXX"



    pool = Pool(processes=nprocs)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]

        label_to_config = OrderedDict([(
            label, {
                # "base_folder": "/HOME/huziy/skynet3_rech1/CRCM5_outputs/cc_canesm2_rcp85_gl/coupled-GL-future_CanESM2/Samples",
                DataManager.SP_BASE_FOLDER: base_folder,
                DataManager.SP_DATASOURCE_TYPE: data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_{}_{}-{}".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
Beispiel #11
0
def launchCMAESForAllTargetSizesMulti():
    '''
    Launch in parallel (on differents processor) the cmaes optimization for each target size
    '''
    #initializes setup variables
    rs = ReadSetupFile()
    #initializes a pool of worker, ie multiprocessing
    p = Pool()
    #run cmaes on each targets size on separate processor
    p.map(launchCMAESForSpecificTargetSize, rs.sizeOfTarget, "theta")
Beispiel #12
0
def get_word():
    domains=open('dic/newwords').readlines()
    try:
        pool=Pool(processes=2)
        pool.map(check_domain,domains)
        pool.close()
        pool.join()
    except Exception as e:
        print e
        pass
    def run(self, test_name=None, db_adapter=None):

        if db_adapter is None:
            db_adapter = DEFAULT_DATABASE_ADAPTER
        if test_name is None:
            test_name = '_'.join([db_adapter, datetime.datetime.now().strftime("%Y-%m-%d %H:%M")])

        print ''.join(['Running "', test_name, '" test'])
        print 'Prepare database'

        adapter = adapter_factory(db_adapter)
        adapter.prepare_db()
        test_id = adapter.create_new_test(test_name)

        print ''
        print 'Create user documents'

        pool = Pool(processes=10)
        params = [{'user_id': i, 'docs_per_user': DOCS_PER_USER, 'db_adapter': db_adapter}
                  for i in range(1, USERS_COUNT + 1)]

        start = time.time()
        try:
            pool.map(create_users, params)
            print 'Full time:', time.time() - start
        finally:
            pool.terminate()
        del pool

        print 'OK! Users were created!'
        print ''

        for i in range(1, MAX_PROCESSES + 1):
            print 'Run test with %d proceses' % i
            pool = Pool(processes=i)
            params = [{'user_id': j, 'db_adapter': db_adapter} for j in range(1, USERS_COUNT + 1)]
            start = time.time()
                
            try:
                res = pool.map(update_users, params)
                full_time = time.time() - start
            finally:
                pool.terminate()
            del pool

            print 'Test is finished! Save results'
            print ''

            adapter.save_results(test_id, res, i)

            print 'Full time:', full_time
            print ''

        print 'Finish!'
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    download = partial(download_link, download_dir)
    p = Pool(8)
    p.map(download, links)
    print('Took {}s'.format(time() - ts))
Beispiel #15
0
def validate_series(yaml_file, sequence_dictionary):
    """
    :param yaml_file: The mdl yaml file.
    :param sequence_dictionary: Dictionary of sequences
    :return: Runs a large number of sequence tests on the series to make sure
    the sequences for each protein match the given sequence and the series itself
    """
    yaml_file = load_yaml_file(yaml_file)
    p = Pool(cpu_count())
    jobs = [(yaml_file, protein, sequence_dictionary) for protein in yaml_file["protein_list"]]
    p.map(_validate_protein, jobs)

    return
def main_obs():
    label = "Obs_monthly_icefix_test2_1proc_speedtest_3"


    period = Period(
        datetime(1980, 1, 1), datetime(2010, 12, 31)
    )


    pool = Pool(processes=20)

    input_params = []
    for month_start in period.range("months"):

        month_end = month_start.add(months=1).subtract(seconds=1)

        current_month_period = Period(month_start, month_end)
        current_month_period.months_of_interest = [month_start.month, ]


        vname_to_level_erai = {
            T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
            U_WE: VerticalLevel(1, level_kinds.HYBRID),
            V_SN: VerticalLevel(1, level_kinds.HYBRID),
        }

        vname_map = {}
        vname_map.update(vname_map_CRCM5)

        label_to_config = OrderedDict([(
            label, {
                DataManager.SP_BASE_FOLDER: "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix",
                DataManager.SP_DATASOURCE_TYPE: data_source_types.ALL_VARS_IN_A_FOLDER_IN_NETCDF_FILES_OPEN_EACH_FILE_SEPARATELY,
                DataManager.SP_INTERNAL_TO_INPUT_VNAME_MAPPING: vname_map,
                DataManager.SP_LEVEL_MAPPING: vname_to_level_erai,
                DataManager.SP_OFFSET_MAPPING: vname_to_offset_CRCM5,
                DataManager.SP_MULTIPLIER_MAPPING: vname_to_multiplier_CRCM5,
                DataManager.SP_VARNAME_TO_FILENAME_PREFIX_MAPPING: vname_to_fname_prefix_CRCM5,
                "out_folder": "lake_effect_analysis_daily_{}_{}-{}".format(label, period.start.year, period.end.year)
            }
        )])

        kwargs = dict(
            label_to_config=label_to_config, period=current_month_period, months_of_interest=current_month_period.months_of_interest, nprocs_to_use=1
        )

        print(current_month_period.months_of_interest)
        input_params.append(kwargs)

    # execute in parallel
    pool.map(monthly_func, input_params)
def main():

	# catch parameters
	segmentation_base_string = sys.argv[1]
	ground_truth_base_string = sys.argv[2]
	mask_file_base_string = sys.argv[3]
	cases = sys.argv[4:]

	# evaluate each case and collect the scores
	hds = []
	assds = []
	precisions = []
	recalls = []
	dcs = []

	# load images and apply mask to segmentation and ground truth (to remove ground truth fg outside of brain mask)
	splush = [load(segmentation_base_string.format(case)) for case in cases]
	tplush = [load(ground_truth_base_string.format(case)) for case in cases]
	masks = [load(mask_file_base_string.format(case))[0].astype(numpy.bool) for case in cases]

	s = [s.astype(numpy.bool) & m for (s, _), m in zip(splush, masks)]
	t = [t.astype(numpy.bool) & m for (t, _), m in zip(tplush, masks)]
	hs = [h for _, h in splush]
	ht = [h for _, h in tplush]

	# compute and append metrics (Pool-processing)
	pool = Pool(n_jobs)
	dcs = pool.map(wdc, zip(t, s))
	precisions = pool.map(wprecision, zip(s, t))
	recalls = pool.map(wrecall, zip(s, t))
	hds = pool.map(whd, zip(t, s, [header.get_pixel_spacing(h) for h in ht]))
	assds = pool.map(wassd, zip(t, s, [header.get_pixel_spacing(h) for h in ht]))

	# print case-wise results
	print 'Metrics:'
	print 'Case\tDC[0,1]\tHD(mm)\tP2C(mm)\tprec.\trecall'
    	for case, _dc, _hd, _assd, _pr, _rc in zip(cases, dcs, hds, assds, precisions, recalls):
        	print '{}\t{:>3,.3f}\t{:>4,.3f}\t{:>4,.3f}\t{:>3,.3f}\t{:>3,.3f}'.format(case, _dc, _hd, _assd, _pr, _rc)
        
	# check for nan/inf values of failed cases and signal warning
	mask = numpy.isfinite(hds)
	if not numpy.all(mask):
		print 'WARNING: Average values only computed on {} of {} cases!'.format(numpy.count_nonzero(mask), mask.size)
		
    	print 'DM  average\t{} +/- {} (Median: {})'.format(numpy.asarray(dcs)[mask].mean(), numpy.asarray(dcs)[mask].std(), numpy.median(numpy.asarray(dcs)[mask]))
    	print 'HD  average\t{} +/- {} (Median: {})'.format(numpy.asarray(hds)[mask].mean(), numpy.asarray(hds)[mask].std(), numpy.median(numpy.asarray(hds)[mask]))
    	print 'ASSD average\t{} +/- {} (Median: {})'.format(numpy.asarray(assds)[mask].mean(), numpy.asarray(assds)[mask].std(), numpy.median(numpy.asarray(assds)[mask]))
    	print 'Prec. average\t{} +/- {} (Median: {})'.format(numpy.asarray(precisions)[mask].mean(), numpy.asarray(precisions)[mask].std(), numpy.median(numpy.asarray(precisions)[mask]))
    	print 'Rec. average\t{} +/- {} (Median: {})'.format(numpy.asarray(recalls)[mask].mean(), numpy.asarray(recalls)[mask].std(), numpy.median(numpy.asarray(recalls)[mask]))
Beispiel #18
0
def main():
    parser = argparse.ArgumentParser(description='portScan.py')
    parser.add_argument("-ip", dest="ip", help="ip to scan")
    parser.add_argument("-i", dest="iniPort", help="initial port ")
    parser.add_argument("-e", dest="endPort", help="end port")

    params = parser.parse_args()

    ip = str(params.ip)

    print('[+] Scanned IP -> ' + ip)
    print('\t' + str(params.iniPort) + ' - ' + str(params.endPort))

    p = Pool(50)
    p.map(scan, range(int(params.iniPort), int(params.endPort)))
Beispiel #19
0
def runoff_to_netcdf_parallel(indir, outdir):
    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    in_names = [x for x in os.listdir(indir) if x.startswith("pm") and x.endswith("p")]

    in_paths = [os.path.join(indir, name) for name in in_names]

    out_paths = [os.path.join(outdir, inName + ".nc") for inName in in_names]

    ppool = Pool(processes=10)
    print("The paths below go to: ")
    print(in_paths[0])
    print("Go into: {}".format(out_paths[0]))
    ppool.map(extract_runoff_to_nc_process, list(zip(in_paths, out_paths)))
Beispiel #20
0
def main():
    opts, args = parse_options()

    datasets = build_datasets(opts, args[0])
    sources = glob(args[1])

    stime = time()

    pool = Pool(opts.jobs)

    pool.map(partial(job, opts, datasets), sources)

    cputime = clock()
    duration = time() - stime

    opts.verbose and log("Processed in in {0:0.2f}s using {1:0.2f}s of CPU time.", duration, cputime)
Beispiel #21
0
    def update(self, export='csv'):
        """ 更新已经下载的历史数据
        :param export: 历史数据的导出方式,目前支持持 csv
        :return:
        """
        stock_codes = []
        for file in os.listdir(self.raw_path):
            if not file.endswith('.json'):
                continue
            stock_code = file[:6]
            stock_codes.append(stock_code)

        pool = Pool(10)
        func = partial(self.update_single_code)
        if export.lower() in ['csv']:
            pool.map(func, stock_codes)
def get_correlation_parallel(s1,s2):
    """
    params s1 - series 1
    params s2 - series 2 
    NOTE : series are number 1 to 25 when giving in arguments
    returns the correlation between series
    """
    start = time.time()
    offsets = [] #this will be the arguments to all the parallel jobs
    instances = (MAX_ROWS/BATCH_SIZE)
    mean,std = calculate_mean_std_parallel()
    stripped_mean,stripped_std = calculate_stripped_mean_std_parallel(mean,std)
    processes = Pool(processes=instances)
    for i in range(instances):
        offsets.append((s1,s2,mean,std,stripped_mean,stripped_std,i*BATCH_SIZE))
    results = processes.map(get_correlation,offsets)
    processes.close()
    processes.join()
    pearson_corr = 0
    total = 0
    for result in results:
        pearson_corr += result[0]*result[1]
        total += result[1]
    pearson_corr = 1.0*pearson_corr / total
    t_value = abs(pearson_corr*math.sqrt( 1.0*(total - 2) / ( 1 - (pearson_corr*pearson_corr))))
    p_value = t.sf(t_value,total-2)
    print "\n ######### CORRELATION BETWEEN SERIES ",s1," AND SERIES ",s2, " is ",pearson_corr , "t value is ", t_value ," and p value is ", p_value,  "######### \n" 
    end = time.time()
    print "EXECUTION TIME : ", end-start , " sec"
    return pearson_corr
Beispiel #23
0
class Runner(DatabaseRunner):
	""" Class for running algorithms against test images """

	def __init__(self, algorithm, domain, arguments=None):
		"""
		The domain dictates which images to use as sources. The limit is an
		optional maximum number of images to use as sources.  If random, images
		will be pulled in random order, up to limit; otherwise, images will be
		pulled in sequential order.  Tags are used to control the image selection
		further.
		"""
		DatabaseRunner.__init__(self, algorithm, arguments)
		self._domain = domain
		self._image_id = None
		if kMaxWorkers > 1:
			self._pool = Pool(int(config.get('global', 'max_workers')))

	def run(self):
		""" Runs the algorithm on the images matching the supplied arguments """
		self._logger.debug('Fetching image IDs from database')
		if self._arguments.image_id:
			self._image_id = self._arguments.image_id
			images = (self._database_mapper.get_image_for_analysis(self._domain, self._image_id), )
		else:
			images = self._database_mapper.get_images_for_analysis(self._domain, self._arguments.limit, self._arguments.random, self._arguments.tags_require, self._arguments.tags_exclude)
		self._logger.debug('Processing {0} images'.format(len(images)))
		if kMaxWorkers > 1:
			return self.evaluate(self._pool.map(self._algorithm.apply, images))
		else:
			return self.evaluate(map(self._algorithm.apply, images))
def _get_data(data_folder = "data/crcm4_data", v_name = "pcp",
              member_list = None, year_range = None, months = None):
    """
    returns seasonal means of each year for all members in the list
    Note!: uses caching
    """
    year_range = list(year_range)
    cache_file = "_".join(member_list) + "_" + "_".join(map(str, months)) + \
                 "_{0}_from_{1}_to_{2}_cache.bin".format(v_name, year_range[0], year_range[-1])



    if os.path.isfile(cache_file):
        return pickle.load(open(cache_file))

    p = Pool(processes=len(member_list))

    #prepare input for the parallel processes
    m_folders = map(lambda x: os.path.join(data_folder,"{0}_p1{1}".format(x, v_name)), member_list)
    year_ranges = [year_range] * len(member_list)
    months_for_p = [months] * len(member_list)
    #calculate means
    result = p.map(_get_annual_means_for_year_range_p, zip(m_folders, year_ranges, months_for_p))

    result = np.concatenate(result, axis = 0) #shape = (n_members * len(year_range)) x nx x ny
    print result.shape

    pickle.dump(result, open(cache_file, "w"))
    return result
Beispiel #25
0
 def check(self, artdict):
     print("Checking for infobox existence")
     pool = Pool(processes=100)
     revs = []
     for a in artdict:
         rev = artdict[a]["Revision"].split('oldid=')[1].strip()
         revs.append((a, rev))
     texts = dict(pool.map(self.get_text, revs))
     for a in artdict:
         text = texts[a]
         if text is None:
             artdict[a]["MultiInfobox"] = 0
             artdict[a]["Infobox programming language"] = -1
             artdict[a]["Infobox software"] = -1
             artdict[a]["Infobox file format"] = -1
         else:
             if 'infobox programming language' in text.lower():
                 artdict[a]["Infobox programming language"] = text.lower().index('infobox programming language')
             else:
                 artdict[a]["Infobox programming language"] = -1
             if 'infobox software' in text.lower():
                 artdict[a]["Infobox software"] = text.lower().index('infobox software')
             else:
                 artdict[a]["Infobox software"] = -1
             if 'infobox file format' in text.lower():
                 artdict[a]["Infobox file format"] = text.lower().index('infobox file format')
             else:
                 artdict[a]["Infobox file format"] = -1
             artdict[a]["MultiInfobox"] = text.lower().count("{{infobox")
     return artdict
Beispiel #26
0
    def sum_lines(self, SPEEDUP=True):
        filesname = []
        for item_dir in self.dirlist.keys():
            for item_file in self.dirlist[item_dir][1]:
                filesname.append(item_dir + '/' + item_file)

        if SPEEDUP:
            # when python version is less then 3.3, multiprocessing.pool.Pool
            # don't support the context management protocol
            if sys.version_info.major is 3 and sys.version_info.minor >= 3:
                with Pool(self.MAX_RES) as res_pool:
                    return reduce(self._adder, res_pool.map(self._count_filelines, filesname))
            else:
                # in python2.x(maybe python3.[0-2]),
                # multiprocessing must pickle things to sling them among processes,
                # and bound methods are not picklable.
                # the workaround (whether you consider it "easy" or not;-) is to
                # add the infrastructure to your program to allow such methods to be pickled,
                # registering it with the copy_reg standard library method.
                # the following is a elusion to make it work in python2.x
                res_pool = Pool(processes=self.MAX_RES)
                retval = res_pool.map(_filecounter, filesname)
                return reduce(self._adder, retval)
        else:
            for filename in filesname:
                with open(filename, 'rb') as filebuf:
                    self.filesline += len(filebuf.readlines())
            return self.filesline
Beispiel #27
0
def parallel_main():
    recs = sys.stdin.readlines()
    vals = [int(rec) for rec in recs]
    p = Pool()
    results = p.map(solve, vals)
    for v1, v2 in results:
        print("{} {}".format(v1, v2))
Beispiel #28
0
def test_stemming():
    with open("tests.txt") as file:
        pool = Pool(4)
        results = pool.map(validate, file)
    for result in results:
        if result:
            yield assert_output, result[0], result[1]
Beispiel #29
0
def parallel_main():
    recs = iter(sys.stdin.readlines())
    cuts_list = []
    cuts_list_append = cuts_list.append
    cuts = []
    cuts_extend = cuts.extend
    cuts_append = cuts.append
    cuts_clear = cuts.clear

    while True:
        # length of stick
        L = int(next(recs))
        if L == 0:
            break

        # number of cut
        n_cut = int(next(recs))
        # cutting points
        cuts_clear()
        cuts_append(0)
        cuts_extend(list(map(int, next(recs).split())))
        cuts_append(L)
        cuts_list_append(cuts[:])

    p = Pool(4)
    results = p.map(min_cut, cuts_list)
    for res in results:
        print(res)
def get_urls1():
	f2 = open('app_links.txt','r')

	nprocs = 100 # nprocs is the number of processes to run
	ParsePool = Pool(nprocs)
	#ParsePool.map(btl_test,url)
	ParsedURLS = ParsePool.map(urlsDeatilsExtract,f2)
Beispiel #31
0
gain_above_dict = {
    float(key): value
    for key, value in gain_above_dict.iteritems()
}
gain_below_dict = {
    float(key): value
    for key, value in gain_below_dict.iteritems()
}

# This configuration of the multiprocessing call is necessary for passing multiple arguments to the main function
# It is based on the example here: http://spencerimp.blogspot.com/2015/12/python-multiprocess-with-multiple.html
# Ran with 16 processors on r4.16xlarge
num_of_processes = 16
pool = Pool(num_of_processes)
pool.map(
    partial(annual_gain_rate_mangrove.annual_gain_rate,
            gain_above_dict=gain_above_dict,
            gain_below_dict=gain_below_dict), mangrove_ecozone_list)
pool.close()
pool.join()

# # For single processor use
# for tile in mangrove_ecozone_list:
#
#     annual_gain_rate_mangrove.annual_gain_rate(tile, gain_table_dict)

print "Tiles processed. Uploading to s3 now..."

uu.upload_final_set(cn.annual_gain_AGB_mangrove_dir,
                    cn.pattern_annual_gain_AGB_mangrove)
uu.upload_final_set(cn.annual_gain_BGB_mangrove_dir,
                    cn.pattern_annual_gain_BGB_mangrove)
Beispiel #32
0
def ensemble(training_output_folder1, training_output_folder2, output_folder,
             task, validation_folder, folds):
    print("\nEnsembling folders\n", training_output_folder1, "\n",
          training_output_folder2)

    output_folder_base = output_folder
    output_folder = join(output_folder_base, "ensembled_raw")

    # only_keep_largest_connected_component is the same for all stages
    dataset_directory = join(preprocessing_output_dir, task)
    plans = load_pickle(join(training_output_folder1,
                             "plans.pkl"))  # we need this only for the labels

    files1 = []
    files2 = []
    property_files = []
    out_files = []
    gt_segmentations = []

    folder_with_gt_segs = join(dataset_directory, "gt_segmentations")

    for f in folds:
        validation_folder_net1 = join(training_output_folder1, "fold_%d" % f,
                                      validation_folder)
        validation_folder_net2 = join(training_output_folder2, "fold_%d" % f,
                                      validation_folder)
        patient_identifiers1 = subfiles(validation_folder_net1, False, None,
                                        'npz', True)
        patient_identifiers2 = subfiles(validation_folder_net2, False, None,
                                        'npz', True)
        # we don't do postprocessing anymore so there should not be any of that noPostProcess
        patient_identifiers1_nii = [
            i for i in subfiles(validation_folder_net1,
                                False,
                                None,
                                suffix='nii.gz',
                                sort=True)
            if not i.endswith("noPostProcess.nii.gz")
            and not i.endswith('_postprocessed.nii.gz')
        ]
        patient_identifiers2_nii = [
            i for i in subfiles(validation_folder_net2,
                                False,
                                None,
                                suffix='nii.gz',
                                sort=True)
            if not i.endswith("noPostProcess.nii.gz")
            and not i.endswith('_postprocessed.nii.gz')
        ]
        assert len(patient_identifiers1) == len(
            patient_identifiers1_nii
        ), "npz seem to be missing. run validation with --npz"
        assert len(patient_identifiers1) == len(
            patient_identifiers1_nii
        ), "npz seem to be missing. run validation with --npz"
        assert all([
            i[:-4] == j[:-7]
            for i, j in zip(patient_identifiers1, patient_identifiers1_nii)
        ]), "npz seem to be missing. run validation with --npz"
        assert all([
            i[:-4] == j[:-7]
            for i, j in zip(patient_identifiers2, patient_identifiers2_nii)
        ]), "npz seem to be missing. run validation with --npz"

        all_patient_identifiers = patient_identifiers1
        for p in patient_identifiers2:
            if p not in all_patient_identifiers:
                all_patient_identifiers.append(p)

        # assert these patients exist for both methods
        assert all([
            isfile(join(validation_folder_net1, i))
            for i in all_patient_identifiers
        ])
        assert all([
            isfile(join(validation_folder_net2, i))
            for i in all_patient_identifiers
        ])

        maybe_mkdir_p(output_folder)

        for p in all_patient_identifiers:
            files1.append(join(validation_folder_net1, p))
            files2.append(join(validation_folder_net2, p))
            property_files.append(join(validation_folder_net1, p)[:-3] + "pkl")
            out_files.append(join(output_folder, p[:-4] + ".nii.gz"))
            gt_segmentations.append(
                join(folder_with_gt_segs, p[:-4] + ".nii.gz"))

    p = Pool(default_num_threads)
    p.map(merge, zip(files1, files2, property_files, out_files))
    p.close()
    p.join()

    if not isfile(join(output_folder, "summary.json")) and len(out_files) > 0:
        aggregate_scores(tuple(zip(out_files, gt_segmentations)),
                         labels=plans['all_classes'],
                         json_output_file=join(output_folder, "summary.json"),
                         json_task=task,
                         json_name=task + "__" +
                         output_folder_base.split("/")[-1],
                         num_threads=default_num_threads)

    if not isfile(join(output_folder_base, "postprocessing.json")):
        determine_postprocessing(output_folder_base,
                                 folder_with_gt_segs,
                                 "ensembled_raw",
                                 "temp",
                                 "ensembled_postprocessed",
                                 default_num_threads,
                                 dice_threshold=0)

        out_dir_all_json = join(network_training_output_dir, "summary_jsons")
        json_out = load_json(
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"))

        json_out["experiment_name"] = output_folder_base.split("/")[-1]
        save_json(
            json_out,
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"))

        maybe_mkdir_p(out_dir_all_json)
        shutil.copy(
            join(output_folder_base, "ensembled_postprocessed",
                 "summary.json"),
            join(out_dir_all_json,
                 "%s__%s.json" % (task, output_folder_base.split("/")[-1])))
Beispiel #33
0
        pages = soups.find('div', attrs={
            'class': 'pagenavi'
        }).find_all('span')[-2].getText()
        dirname = u'[{}P] {}'.format(int(pages), title)
        if not os.path.exists(dirname):
            os.mkdir(dirname)
        for page in range(1, int(pages) + 1):
            each_pic = detail_link + '/' + str(page)
            picture = requests.get(each_pic, headers=headers).content
            pic_html = BeautifulSoup(picture, 'lxml')
            img = pic_html.find('div', attrs={
                'class': 'main-image'
            }).find('img')['src']
            filename = '%s/%s/%s.jpg' % (os.path.abspath('.'), dirname, n)
            print(u'开始下载图片:%s 第%s张' % (dirname, n))
            try:
                with open(filename, 'wb+') as jpg:
                    jpg.write(requests.get(img, headers=headers).content)
                n += 1
                time.sleep(1)
            except:
                pass


if __name__ == '__main__':
    pool = Pool(10)
    page = [x for x in range(1, 154)]
    pool.map(get_pic, page)
    pool.close()
    pool.join()
Beispiel #34
0
    except Exception:
        print('存储到mongo失败', result)


# 获取某个品牌所有详情页的url
def get_all_urls(brand):
    detail_urls = []
    total, urls = search(brand)
    total = int(total)
    detail_urls.extend(urls)
    for i in range(2, 2 + 1):
        result = next_page(i)
        detail_urls.extend(result)
        # break
    return detail_urls


def main(brand):
    urls = get_all_urls(brand)
    for url in set(urls):
        url = 'http:' + url
        get_products(url)


if __name__ == '__main__':
    brands = []
    pool = Pool(processes=3)  # 设置进程池中的进程数
    pool.map(main, brands)  # 将列表中的每个对象应用到get_page_list函数
    pool.close()  # 等待进程池中的进程执行结束后再关闭pool
    pool.join()
    # main()
Beispiel #35
0
 def parallel_pair_e_it(self, chunk_it):
     p = Pool(4)
     for chunk in chunk_it:
         for pair_e in p.map(self.mod_pairs, chunk):
             yield pair_e
     p.close()
Beispiel #36
0
        json_latest = self.get_latest_page(offset)
        print(len(json_latest))
        json_byte = self.get_byte_page(offset)

        latest_records = self.get_futurism_infos(json_latest)
        self.save_info_Mysql(latest_records)
        # print(latest_records)

        print('==========' * 10)
        byte_records = self.get_futurism_infos(json_byte)
        self.save_info_Mysql(byte_records)
        # print(byte_records)

    def run(self, offset):
        self.start(offset)


#多线程下载
if __name__ == '__main__':
    # main(1)
    fs = Futurism_Spider()
    # fs.run(1)
    pool = Pool()
    groups = ([
        x * fs.OFF_SET for x in range(fs.GROUP_START, fs.GROUP_END + 1)
    ])
    pool.map(fs.run, groups)
    pool.close()
    pool.join()
Beispiel #37
0
		luckey_count = 0
		for i in range(*interval):
			if sum_ok(i, int(n/2)):
					luckey_count += 1
		return luckey_count

if __name__ == "__main__":
	
	import time
	from multiprocessing.pool import Pool
	
	parts = 50
	workers = 8
	
	# start of calculation
	start = time.time()

	part = 10**n / parts
	incremet_sum = 0
	
	intervals = []
	for i in range(parts):
		intervals.append((incremet_sum, incremet_sum + part))
		incremet_sum += part
	
	pool = Pool(workers)
	luckey_count = pool.map(count_on_interval, intervals)
	print luckey_count

	print 'Entire job took:', time.time() - start, 'probability:', (1.0 * sum(luckey_count)) / 10**n
Beispiel #38
0
def run_cv(f, n_proc):
    p = Pool(n_proc)
    p.map(f, range(len(configs)))
    p.close()  # no more tasks
    p.join()  # wrap up current tasks
    def check_words(self, is_sorted=False, is_reversed=False):
        # Split the long string into separate strings, and make some IDs.
        words = list([w for w in LONG_TEXT[:100].split(' ') if w])

        print("Adding words: {}".format(words))

        # Avoid adding the same string twice (or a prefix of a previous string).
        #  - because it's a current problem unless we append string IDs, which makes things too slow
        # words = set(words)
        # words = [w for w in words if 0 != sum([x.startswith(w) for x in words if x != w])]

        assert words

        # Make a string ID for each string.
        strings = {}
        for string in words:
            string_id = uuid.uuid4().hex
            strings[string_id] = string

        # Create a new suffix tree.
        self.app = SuffixTreeApplicationWithCassandra()
        st = self.app.register_new_suffix_tree()
        assert st.id in self.app.suffix_tree_repo

        # Close the app, so the pool doesn't inherit it.
        self.app.close()

        # Start the pool.
        pool = Pool(initializer=pool_initializer, processes=1)

        words = [[s, sid, st.id] for sid, s in strings.items() if s]

        if is_sorted:
            words = sorted(words)
        if is_reversed:
            words = reversed(words)

        results = pool.map(add_string_to_suffix_tree, words)
        for result in results:
            if isinstance(result, Exception):
                print(result.args[0][1])
                raise result

        # Creat the app again.
        self.app = SuffixTreeApplicationWithCassandra()

        errors = []

        # Check the suffix tree returns string ID for all substrings of string.
        for string_id, string in strings.items():
            # Check all prefixes and suffixes.
            substrings = sorted(list(get_all_substrings(string)))
            print("")
            print("Checking for all substrings of string '{}': {}".format(
                repr(string), " ".join([repr(s) for s in substrings])))
            for substring in substrings:
                results = self.app.find_string_ids(substring, st.id)
                if string_id not in results:
                    msg = "Not found: substring '{}' from string '{}'".format(
                        repr(substring), repr(string))
                    print(msg)
                    errors.append(msg)

        # Check for errors.
        self.assertFalse(errors, "\n".join(errors))
        dst_image_path = os.path.join('./data/image', name)
        dst_mask_path = os.path.join('./data/mask', name)
        try:
            img = imread(image_path)
            img, mask = preprocess(img)
            img = cv.resize(img, dsize)
            mask = cv.resize(mask, dsize)
            imwrite(dst_image_path, img)
            imwrite(dst_mask_path, mask)
        except:
            print(image_path)
            continue


if __name__ == "__main__":

    image_list = glob.glob(os.path.join('./data/sample', '*.jpeg'))

    patches = 16
    patch_len = int(len(image_list) / patches)
    filesPatchList = []
    for i in range(patches - 1):
        fileList = image_list[i * patch_len:(i + 1) * patch_len]
        filesPatchList.append(fileList)
    filesPatchList.append(image_list[(patches - 1) * patch_len:])

    # mutiple process
    pool = Pool(patches)
    pool.map(process, filesPatchList)
    pool.close()
Beispiel #41
0
                with open('{}\\{}.jpg'.format(save_path, pic_name),
                          'ab+') as pic_write:
                    pic_write.write(resp.content)
        except requests.ConnectionError:
            print("{}\n{}无法获取此图片,保存失败!".format(file_title, url))


def main(offset):
    json = get_json_data(offset)
    for info in get_pic_url(json):
        if info is not None:
            save_pic(info['title'], info['url'])
        else:
            print('offset={},已获取完街拍图片'.format(offset))
            break
        break


if __name__ == '__main__':
    start_time = time.time()
    INIT_PAGE = 50
    offset_list = ([x * 20 for x in range(0, INIT_PAGE)])
    pool = Pool()
    pool.map(main, offset_list)
    pool.close()
    pool.join()
    used_time = time.time() - start_time
    print("图片保存路径:", os.getcwd())
    print('耗费时间:', used_time)
# 不加多线程时间 181.3823745250702
# 多线程:耗费时间: 59.359395027160645
Beispiel #42
0
        detail = dl.xpath('.//text()')
        detail = str(''.join(detail)).replace('\xa0', '').strip()
        infos.append(detail)
        #print(detail)
    save('\n'.join(infos))


def get_pages(url):  #首页
    response = requests.get(url, headers=headers)
    # print(response.text)
    selector = etree.HTML(response.text)
    items = selector.xpath('//div[@class="city_spots_list"]/ul//li')
    for item in items:
        #获取详情页url
        href = item.xpath('./a/@href')[0]
        #print(href)
        res = get_detail(href)
        paser_pages(res)


if __name__ == '__main__':
    #多线程爬取
    page_href = [
        'https://yancheng.cncn.com/jingdian/1-{}-0-0.html'.format(str(i))
        for i in range(1, 6)
    ]
    pool = Pool()
    result = pool.map(get_pages, page_href)
    pool.close()
    pool.join()
            print(f"downloading {centre.upper()} {var_name} hindcasts for year {year}")

            c.retrieve(
                'seasonal-monthly-single-levels',
                {
                    'originating_centre':centre,
                    'system':system,
                    'variable':var_dict[var_name],
                    'product_type':'monthly_mean',
                    'year':str(year),
                    'month':[
                        '01','02','03',
                        '04','05','06',
                        '07','08','09',
                        '10','11','12'
                    ],
                    'leadtime_month':[
                        '2','3','4',
                        '5','6'
                    ],
                    'format':'grib'
                },
                f'{str(dpath)}/{centre.upper()}_system_{system}_{var_name}_{year}.grib')


# initialise a Pool with the

p = Pool(workers)

p.map(fetch_hindcast, list(range(workers)))
Beispiel #44
0
    site = 'http://www.mmjpg.com/mm/'
    mm_url = site + str(num)
    print(mm_url)
    count = get_mmurl_count(mm_url)
    print(count)
    title = get_mmurl_title(mm_url)
    print(title)
    mmurl_t = get_mmurl_t(mm_url)
    # print(mmurl_t)
    path = 'D:\爬虫\图片下载\\' + title
    if os.path.isdir(path):
        pass
    else:
        os.mkdir(path)
    for i in range(1, count + 1):
        imgs = mmurl_t + str(i) + '.jpg'
        # print(imgs)
        with open(path + '\\' + str((1000 + i)) + '.jpg', 'wb') as f:
            f.write(requests.get(imgs, headers=header2).content)


if __name__ == '__main__':
    s = time.time()
    # for i in range(1242,1256):
    #     main(i)
    # main(1242,1256)
    pool = Pool()
    pool.map(main, [i for i in range(1242, 1256)])

    e = time.time()
    print(e - s)
Beispiel #45
0
def processResultPath(new_base, resultAndPath):
    result, path = resultAndPath
    mean, stderr, count = result

    # sampled_mean = windowAverage(mean)
    # sampled_stderr = windowAverage(stderr)

    sampled_mean = everyN(mean, EVERY)
    sampled_stderr = everyN(stderr, EVERY)

    sampled = [sampled_mean, sampled_stderr, count]

    new_path = new_base + '/' + rest(path)
    os.makedirs(up(new_path), exist_ok=True)

    np.save(new_path, sampled)


if __name__ == '__main__':
    pool = Pool()

    new_base = sys.argv[1]
    exp_paths = sys.argv[2:]
    for exp_path in exp_paths:
        exp = loadExperiment(exp_path)

        result_paths = listResultsPaths(exp)
        results = map(loadResults, result_paths)

        pool.map(partial(processResultPath, new_base), results)
        os.makedirs('result\\'+item.get('title'))
    try:
        response = requests.get(item.get('image_url'))
        if response.status_code == 200:
            file_path = 'result\\{0}/{1}.{2}'.format(item.get('title'), md5(response.content).hexdigest(), 'jpg')
            if not os.path.exists(file_path):
                with open(file_path, 'wb') as f:
                    f.write(response.content)
            else:
                print('Already Downloaded', file_path)
    except requests.ConnectionError:
        print('Failed to save image!')

from multiprocessing.pool import Pool

def main(offset):
    json = get_one_page(offset)
    for item in get_images(json):
        print(item)
        save_image(item)

GROUP_START = 1
GROUP_END = 5

if __name__ == '__main__':
    pool = Pool()
    groups = ([x * 20 for x in range(GROUP_START, GROUP_END+1)])
    pool.map(main, groups)
    pool.close()
    pool.join()
    doc = pq(html)
    items = doc('.excerpt-c5').items()

    for item in items:
        title = item.find('h2').text()
        photo_url = item.find('.thumbnail > img').attr('data-src')
        photo_page_url = item.find('h2 > a').attr('href')

        print(title, photo_url)


if __name__ == '__main__':
    start_time = time.time()
    num_cpus = multiprocessing.cpu_count()
    print('将会启动进程数为:', num_cpus)
    pool = Pool(num_cpus)
    url_list = []

    for i in range(1, 10):
        url = 'https://www.lovemmtu.net/page/{}'.format(i)
        url_list.append(url)
        # spider(url)

    # print(get_text('https://www.lovemmtu.net/page/4'))
    pool.map(spider, url_list)
    pool.close()
    pool.join()

    end_time = time.time()
    print('耗时{}s'.format((end_time - start_time)))
 flag1 = 0
 para_space = []
 while flag1 <= S_0.shape[0] - 1:
     flag2 = 0
     while flag2 <= S_index.shape[0] - 1:
         flag3 = 0
         while flag3 <= B_0.shape[0] - 1:
             flag4 = 0
             while flag4 <= B_index.shape[0] - 1:
                 flag5 = 0
                 while flag5 <= M_vir_0.shape[0] - 1:
                     para_arr = np.array([
                         S_0[flag1], S_index[flag2], B_0[flag3],
                         B_index[flag4], M_vir_0[flag5]
                     ])
                     para_space.append(para_arr)
                     flag5 += 1
                 flag4 += 1
             flag3 += 1
         flag2 += 1
     flag1 += 1
 Time2 = time.time()
 print("Sequential execution time:", Time2 - Time1)
 pool = Pool(50)
 hh = pool.map(Func, para_space)
 pool.close()
 pool.join()
 print(hh)
 Time3 = time.time()
 print("Parallel execution time:", Time3 - Time2)
 print("The total time:", Time3 - Time1)
Beispiel #49
0
def add_travel_time_dir(graph_dir,
                        mask_dir,
                        conv_dict,
                        graph_dir_out,
                        min_z=128,
                        dx=4,
                        dy=4,
                        percentile=90,
                        max_speed_band=-2,
                        use_weighted_mean=True,
                        variable_edge_speed=False,
                        mask_prefix='',
                        save_shapefiles=True,
                        n_threads=12,
                        verbose=False):
    '''Update graph properties to include travel time for entire directory'''
    t0 = time.time()
    pickle_protocol = 4  # 4 is most recent, python 2.7 can't read 4

    logger1.info("Updating graph properties to include travel time")
    logger1.info("  Writing to: " + str(graph_dir_out))
    os.makedirs(graph_dir_out, exist_ok=True)

    image_names = sorted(
        [z for z in os.listdir(mask_dir) if z.endswith('.tif')])
    nfiles = len(image_names)
    n_threads = min(n_threads, nfiles)

    params = []
    for i, image_name in enumerate(image_names):
        im_root = image_name.split('.')[0]
        if len(mask_prefix) > 0:
            im_root = im_root.split(mask_prefix)[-1]
        out_file = os.path.join(graph_dir_out, im_root + '.gpickle')

        if (i % 1) == 0:
            logger1.info("\n" + str(i + 1) + " / " + str(len(image_names)) +
                         " " + image_name + " " + im_root)
        mask_path = os.path.join(mask_dir, image_name)
        graph_path = os.path.join(graph_dir, im_root + '.gpickle')

        if not os.path.exists(graph_path):
            # print("  ", str(i), "DNE, skipping: " + str(graph_path))
            logger1.info("  " + str(i) + "DNE, skipping: " + str(graph_path))
            # return
            continue

        if verbose:
            logger1.info("mask_path: " + mask_path)
            logger1.info("graph_path: " + graph_path)

        mask = skimage.io.imread(mask_path)
        G_raw = nx.read_gpickle(graph_path)

        # see if it's empty
        if len(G_raw.nodes()) == 0:
            nx.write_gpickle(G_raw, out_file, protocol=pickle_protocol)
            continue

        params.append((G_raw, mask, conv_dict, min_z, dx, dy, \
                      percentile, \
                      max_speed_band, use_weighted_mean, \
                      variable_edge_speed, \
                      verbose, \
                      out_file,
                      save_shapefiles, im_root, graph_dir_out))

    # exectute
    if n_threads > 1:
        pool = Pool(n_threads)
        pool.map(infer_travel_time, params)
    else:
        infer_travel_time(params[0])

    tf = time.time()
    print("Time to infer speed:", tf - t0, "seconds")
    return
Beispiel #50
0
                author = item.find('.author')
                showContent = item.find('.show-content')
                content = str(htmlTitle + author + showContent)
                save_to_cvs(title, content)
        return None
    except RequestException:
        print('分页请求出错', link)
        return parse_page(link)
    except TimeoutError:
        return parse_page(link)


def save_to_cvs(title, content):
    file_path = '{0}/{1}.{2}'.format('D:\Text1', title, 'html')
    if not os.path.exists(file_path):
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
            print(title, '-下载完成')


def main(i):
    pages = get_links(str(i))
    for page in pages:
        parse_page(page)


if __name__ == '__main__':
    total = int(1416 / 9 + 2)
    pool = Pool()
    pool.map(main, [i for i in range(total)])
Beispiel #51
0
            for i in range(len(header)):
                d[header[i]] = data[i]
            d['cn_name'] = sohu_seeds_collect.get_stock_info(code=code)
            d['code'] = code
            data_list.append(SoHuStock.create(**d, should_save=False))
    logger.info('BATCH DONE')
    return data_list


if __name__ == '__main__':
    batches = []
    batch_size = 100
    batch = []
    codes = sohu_seeds_collect.data().keys()
    for code in codes:
        batch.append(code)
        if len(batch) == batch_size:
            batches.append(batch)
            batch = []

    p = Pool(32)
    data_list = p.map(task, batches)
    all = []
    for data in data_list:
        all.extend(data)

    logger.info('Start writing {} data to disk ...'.format(len(all)))
    SoHuStock.save_many(_deduplicate(all))

    logger.info('DONE')
Beispiel #52
0
parser = argparse.ArgumentParser()
parser.add_argument(
    "--graph_dir", type=str, help="input graph dir", default="data/graphs/vecroad_4/graphs_junc/"
)
parser.add_argument(
    "--save_dir", type=str, help="save wkt dir", default="data/graphs/vecroad_4/graphs_junc_wkt/"
)

args = parser.parse_args()

os.makedirs(args.save_dir, exist_ok=True)

def worker(f):
    print(f)
    name = f.split('.')[0]
    g = graph_helper.read_graph(os.path.join(args.graph_dir, f))
    g = g.clear_self()
    wkt = g.convert_rs_to_wkt()
    all_data = []
    for linestring in wkt:
        all_data.append(("AOI_0_{}_img0".format(name), linestring))
    df = pd.DataFrame(all_data, columns=['ImageId', 'WKT_Pix'])
    df.to_csv(os.path.join(args.save_dir, name + '.csv'), index=False)

files = os.listdir(args.graph_dir)
pool = Pool()
pool.map(worker, files)
pool.close()
pool.join()
Beispiel #53
0
def parallel_score_it(chunk_it, score_f, ncpus=2):
    p = Pool(ncpus)
    for chunk in chunk_it:
        for score in p.map(score_f, chunk):
            yield score
    p.close()
Beispiel #54
0
def daily_photo(request):  # 爬虫
    class Glob:
        def __init__(self):
            self.i = 1

    GL = Glob()

    headers = {
        'Host': 'wall.alphacoders.com',
        'Referer': 'https://wall.alphacoders.com/by_favorites.php?quickload=807801&page=1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest'
    }

    def get_page(page):
        params = {
            # 'search':'fantasy+tiger',
            'quickload': '807801',
            'page': page,
        }
        base_url = 'https://wall.alphacoders.com/featured.php?'
        url = base_url + urlencode(params)
        try:
            resp = requests.get(url, headers=headers)
            print(url)
            # print('ok')
            if resp.status_code == 200:
                return resp.text
        except requests.ConnectionError:
            print('no1')
            return None

    def get_imageurl(html):
        result = re.findall('[a-zA-z]+://images+[^\s]+jpg', html)
        return result

    def main(page):
        GL.i = 0
        html = get_page(page)
        result = get_imageurl(html)
        file_name = 'D:\\Project\\Myblog\\mysite\\blog\\static\\img\\' + os.path.sep + str(page) + '页'
        if not os.path.exists(file_name):
            os.makedirs(file_name)
        # if not os.path.exists(file_path):
        # os.makedirs(file_path)
        for url in result:
            with open(file_name + os.path.sep + str(page) + '-' + str(GL.i) + '.jpg', 'wb') as f:
                content = re.sub('-+[3-4]\d{2}-', '-1920-', url)
                print(content)
                image = requests.get(content)
                f.write(image.content)
                # time.sleep(1)
                GL.i += 1

    if __name__ == '__main__':
        pool = Pool()
        Scope = ([i for i in range(1, 31)])
        pool.map(main, Scope)
        pool.close()
        pool.join()

    return render(request, 'daily_photo.html')
    global s_kp, s_des, pw_kp, pw_des
    matches = []
    print('s_idx', s_idx)
    matches = [match(s_kp[s_idx], s_des[s_idx], pw_kp[pw_idx], pw_des[pw_idx]) for pw_idx in range(5)]

def test():
    s_idx = 0
    return [match_v3(s_kp[s_idx], s_des[s_idx], pw_kp[pw_idx], pw_des[pw_idx]) for pw_idx in range(pw_kp.shape[0])]

def perform_match_v3(s_idx):
    global s_kp, s_des, pw_kp, pw_des
    print('s_idx', s_idx)
    # matches = (match(s_kp[s_idx], s_des[s_idx], pw_kp[pw_idx], pw_des[pw_idx]) for pw_idx in range(5))
    # matches = (match_v2(s_kp[s_idx], s_des[s_idx], pw_kp[pw_idx], pw_des[pw_idx]) for pw_idx in range(5))
    matches = (match_v3(s_kp[s_idx], s_des[s_idx], pw_kp[pw_idx], pw_des[pw_idx]) for pw_idx in range(pw_kp.shape[0]))
    # np.savez_compressed(str(s_idx) + '-M', m=np.asarray(matches2))

if __name__ == '__main__':
    print('Begin pool work')
    pool = Pool()
    # s_idx = range(2)
    s_idx = range(s_kp.shape[0])
    time_start = timer()
    # pool.map(perform_match, s_idx)
    # pool.map(perform_match_v2, s_idx)
    pool.map(perform_match_v3, s_idx)
    time_end = timer()
    pool.close()
    pool.join()
    duration = time_end - time_start
    print("Program took %.3fs" % duration)
Beispiel #56
0
def write_to_elastic(all_images):

    p = Pool(processes=10)
    result = p.map(store_image, all_images)
    p.close()
    p.join()
Beispiel #57
0
            if not os.path.exists(file):
                with open(file, 'wb') as f:
                    f.write(response.content)
            else:
                return ('Already Download', file)
    except requests.ConnectionError:
        print('Failed to save image')


def main(offset):
    json = get_page(offset)
    for item in get_image(json):
        save_image(item)
    return 'Page {} Save Successful'.format(offset / 20)


if __name__ == '__main__':
    print('Program Start')
    #    offset = 10
    #    main(offset)
    GROUP_START = 1
    GROUP_END = 5
    pool = Pool()
    groups = ([x * 20 for x in range(GROUP_START, GROUP_END + 1)])
    results = pool.map(main, groups)
    for result in results:
        print(result)
    pool.close()
    pool.join()
    print('Program End')
Beispiel #58
0
    def _read_obs(self, stns_ids=None):

        # Saw extreme decreased performance due to garbage collection when
        # pandas ran checks for a chained assignment. Turn off this check
        # temporarily.
        opt_val = pd.get_option('mode.chained_assignment')
        pd.set_option('mode.chained_assignment', None)

        try:

            if stns_ids is None:
                stns_obs = self.stns
            else:
                stns_obs = self.stns.loc[stns_ids]

            nstns = len(stns_obs.station_id)
            nprocs = self.nprocs if nstns >= self.nprocs else nstns

            if self.has_start_end_dates:
                start_date = self.start_date
                end_date = self.end_date
            else:
                start_date = None
                end_date = None

            iter_stns = [(row[1], start_date, end_date, self.elems,
                          self.min_hrly_for_dly)
                         for row in stns_obs.iterrows()]

            if nprocs > 1:

                # http://stackoverflow.com/questions/24171725/
                # scikit-learn-multicore-attributeerror-stdin-instance-
                # has-no-attribute-close
                if not hasattr(sys.stdin, 'close'):

                    def dummy_close():
                        pass

                    sys.stdin.close = dummy_close

                pool = Pool(processes=nprocs,
                            initializer=_init_worker,
                            initargs=[_download_obs])
                obs_all = pool.map(_download_obs, iter_stns, chunksize=1)
                pool.close()
                pool.join()

            else:

                obs_all = []

                _init_worker(_download_obs)

                for a_stn in iter_stns:

                    obs_stn = _download_obs(a_stn)
                    obs_all.append(obs_stn)

                _download_obs.ftp.close()

            try:
                obs_all = pd.concat(obs_all, ignore_index=True)
            except ValueError:
                # No valid observations
                obs_all = pd.DataFrame({
                    'station_id': [],
                    'elem': [],
                    'time': [],
                    'obs_value': []
                })

        finally:

            pd.set_option('mode.chained_assignment', opt_val)

        obs_all = obs_all.set_index(['station_id', 'elem', 'time'])
        obs_all = obs_all.sortlevel(0, sort_remaining=True)

        return obs_all
                                             md5(r.content).hexdigest(),
                                             'jpg')  #这里直接是路径,不是创建路径
            if not os.path.exists(file_path):
                with open(file_path, 'wb') as f:
                    f.write(r.content)
            else:
                print('Already Download', file_path)
    except requests.ConnectionError as e:
        print('Failed to download', e)


def main(offset):
    json = get_page(offset)
    if json == None:
        return None
    for item in get_image(json):  #迭代器
        save_images(item)


#爬取20页内容
GROUP_START = 1
GROUP_END = 20
if __name__ == '__main__':
    #使用对进程秒爬
    pool = Pool()
    groups = ([x * 20 for x in range(GROUP_START, GROUP_END + 1)])  #参数时Tuple
    pool.map(main, groups)  #添加到进程池中
    pool.close()
    pool.join()
    print('all download!')
Beispiel #60
0
    for i in range(2, int(num**0.5 + 1)):
        if num % i == 0:
            return False
    return True


def is_concatenate(num1, num2):
    if is_prime(int(str(num1) + str(num2))) and is_prime(
            int(str(num2) + str(num1))):
        return 1
    return 0


p = Pool(processes=16)
num_range = range(2, 10**6)
prime_list = p.map(cal_prime, num_range)
p.close()
p.join()
prime_list_clear = [x for x in prime_list if x is not None]
print(prime_list_clear)

check_list = []

for i in prime_list_clear:
    print(i)
    check_list.append([i])
    for x in check_list:
        check_s = 0
        for j in range(len(x)):
            check_s += is_concatenate(i, x[j])
        if check_s == len(x):