def __wake_cpus(self, cpus): # Requires Python 3.3+. This will "tickle" each CPU to force it to # update its idle counters. if hasattr(os, 'sched_setaffinity'): pid = self.__gettid() save_affinity = os.sched_getaffinity(pid) for idx in cpus: os.sched_setaffinity(pid, [idx]) os.sched_getaffinity(pid) os.sched_setaffinity(pid, save_affinity)
def threads_to_use(): """Returns the number of cores we are allowed to run on""" if hasattr(os, 'sched_getaffinity'): cores = len(os.sched_getaffinity(0)) else: cores = os.cpu_count() return min(_max_threads, cores)
def _worker_count(): cpu_count = 1 try: cpu_count = len(os.sched_getaffinity(0)) except AttributeError: cpu_count = multiprocessing.cpu_count() return cpu_count
def test_set_affinity_and_check(self): os.sched_setaffinity(0, [0, 1]) s_ = os.sched_getaffinity(0) self.assertTrue(0 in s_) self.assertTrue(1 in s_) os.sched_setaffinity(0, [0]) s_ = os.sched_getaffinity(0) self.assertTrue(0 in s_) self.assertFalse(1 in s_) for i_ in xrange(affinity.NO_OF_CPU): os.sched_setaffinity(0, [i_]) s_ = os.sched_getaffinity(0) self.assertTrue(i_ in s_) os.sched_setaffinity(0, [0, 1]) self.assertEqual([0, 1], os.sched_getaffinity(0))
def _get_cpu_affinity(): if hasattr(os, 'sched_getaffinity'): return os.sched_getaffinity(0) if psutil is not None: proc = psutil.Process() # cpu_affinity() is only available on Linux, Windows and FreeBSD if hasattr(proc, 'cpu_affinity'): return proc.cpu_affinity() return None
def cpu_count(): """Return the number of CPUs the current process can use. The returned number of CPUs accounts for: * the number of CPUs in the system, as given by ``multiprocessing.cpu_count``; * the CPU affinity settings of the current process (available with Python 3.4+ on some Unix systems); * CFS scheduler CPU bandwidth limit (available on Linux only, typically set by docker and similar container orchestration systems); * the value of the LOKY_MAX_CPU_COUNT environment variable if defined. and is given as the minimum of these constraints. It is also always larger or equal to 1. """ import math try: cpu_count_mp = mp.cpu_count() except NotImplementedError: cpu_count_mp = 1 # Number of available CPUs given affinity settings cpu_count_affinity = cpu_count_mp if hasattr(os, 'sched_getaffinity'): try: cpu_count_affinity = len(os.sched_getaffinity(0)) except NotImplementedError: pass # CFS scheduler CPU bandwidth limit # available in Linux since 2.6 kernel cpu_count_cfs = cpu_count_mp cfs_quota_fname = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us" cfs_period_fname = "/sys/fs/cgroup/cpu/cpu.cfs_period_us" if os.path.exists(cfs_quota_fname) and os.path.exists(cfs_period_fname): with open(cfs_quota_fname, 'r') as fh: cfs_quota_us = int(fh.read()) with open(cfs_period_fname, 'r') as fh: cfs_period_us = int(fh.read()) if cfs_quota_us > 0 and cfs_period_us > 0: # Make sure this quantity is an int as math.ceil returns a # float in python2.7. (See issue #165) cpu_count_cfs = int(math.ceil(cfs_quota_us / cfs_period_us)) # User defined soft-limit passed as an loky specific environment variable. cpu_count_loky = int(os.environ.get('LOKY_MAX_CPU_COUNT', cpu_count_mp)) aggregate_cpu_count = min(cpu_count_mp, cpu_count_affinity, cpu_count_cfs, cpu_count_loky) return max(aggregate_cpu_count, 1)
def _get_parallel_jobs(): try: return max(len(os.sched_getaffinity(0)), 1) except Exception: pass try: return os.cpu_count() or 1 except Exception: pass try: from multiprocessing import cpu_count return cpu_count() or 1 except Exception: pass return 1
def _collect_system_metadata(metadata): metadata['platform'] = platform.platform(True, False) if sys.platform.startswith('linux'): _collect_linux_metadata(metadata) # CPU count cpu_count = None if hasattr(os, 'cpu_count'): # Python 3.4 cpu_count = os.cpu_count() else: try: import multiprocessing except ImportError: pass else: try: cpu_count = multiprocessing.cpu_count() except NotImplementedError: pass if cpu_count is not None and cpu_count >= 1: metadata['cpu_count'] = str(cpu_count) # CPU affinity if hasattr(os, 'sched_getaffinity'): cpus = os.sched_getaffinity(0) elif psutil is not None: proc = psutil.Process() if hasattr(proc, 'cpu_affinity'): cpus = proc.cpu_affinity() else: # cpu_affinity() is only available on Linux, Windows and FreeBSD cpus = None else: cpus = None if cpus is not None and cpu_count is not None and cpu_count >= 1: if cpus == set(range(cpu_count)): cpus = None if cpus: metadata['cpu_affinity'] = perf._format_cpu_list(cpus) # Hostname hostname = socket.gethostname() _add(metadata, 'hostname', hostname)
def cpu_count(): """Return the number of CPUs the current process can use. The returned number of CPUs accounts for: * the number of CPUs in the system, as given by ``multiprocessing.cpu_count`` * the CPU affinity settings of the current process (available with Python 3.4+ on some Unix systems) * CFS scheduler CPU bandwidth limit (available on Linux only) and is given as the minimum of these three constraints. It is also always larger or equal to 1. """ import math try: cpu_count_mp = mp.cpu_count() except NotImplementedError: cpu_count_mp = 1 # Number of available CPUs given affinity settings cpu_count_affinity = cpu_count_mp if hasattr(os, 'sched_getaffinity'): try: cpu_count_affinity = len(os.sched_getaffinity(0)) except NotImplementedError: pass # CFS scheduler CPU bandwidth limit # available in Linux since 2.6 kernel cpu_count_cfs = cpu_count_mp cfs_quota_fname = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us" cfs_period_fname = "/sys/fs/cgroup/cpu/cpu.cfs_period_us" if os.path.exists(cfs_quota_fname) and os.path.exists(cfs_period_fname): with open(cfs_quota_fname, 'r') as fh: cfs_quota_us = int(fh.read()) with open(cfs_period_fname, 'r') as fh: cfs_period_us = int(fh.read()) if cfs_quota_us > 0 and cfs_period_us > 0: cpu_count_cfs = math.ceil(cfs_quota_us / cfs_period_us) cpu_count_cfs = max(cpu_count_cfs, 1) return min(cpu_count_mp, cpu_count_affinity, cpu_count_cfs)
def get_cpu_count(): """ Try to obtain the number of CPUs available to this process. @return: Number of CPUs or None if unable to obtain. """ try: import os # This was introduced in Python 3.3 only, but exists in Linux # all the way back to the 2.5.8 kernel. # This NOT available in FreeBSD! return len(os.sched_getaffinity(0)) except (ImportError, NotImplementedError, AttributeError): pass try: import multiprocessing return multiprocessing.cpu_count() except (ImportError, NotImplementedError): return None
def available_cpu_cores(fallback: int = 1) -> int: """ Returns the number (an int) of CPU cores available to this **process**, if determinable, otherwise the number of CPU cores available to the **computer**, if determinable, otherwise the *fallback* number (which defaults to 1). """ try: # Note that this is the correct function to use, not os.cpu_count(), as # described in the latter's documentation. # # The reason, which the documentation does not detail, is that # processes may be pinned or restricted to certain CPUs by setting # their "affinity". This is not typical except in high-performance # computing environments, but if it is done, then a computer with say # 24 total cores may only allow our process to use 12. If we tried to # naively use all 24, we'd end up with two threads across the 12 cores. # This would degrade performance rather than improve it! return len(os.sched_getaffinity(0)) except: # cpu_count() returns None if the value is indeterminable. return os.cpu_count() or fallback
def main(args): log.info('Reading input file list %s', args.input) with open(args.input, 'r') as f: file_list = f.read().splitlines() log.info('Determining model sets') model_sets = defaultdict(dict) for fp in file_list: cf = Cmip5File(datanode_fp = fp) key = '{}_{}_{}'.format(cf.model, cf.experiment, cf.ensemble_member) model_sets[key][cf.variable_name] = fp # Set up job queues tasks = multiprocessing.Queue() results = multiprocessing.Queue() # Start workers num_workers = len(os.sched_getaffinity(0)) log.info('Creating {} workers'.format(num_workers)) workers = [Consumer(tasks, results) for i in range(num_workers)] for worker in workers: worker.start() for model_experiment_member, variable_set in model_sets.items(): log.info('Queing model set %s', model_experiment_member) tasks.put(Deriver(variable_set, args.outdir)) # Add a poison pill for each worker for x in range(num_workers): tasks.put(None) num_jobs = len(model_sets) while num_jobs: result = results.get() num_jobs -= 1 print(str(num_jobs) + ' Jobs left')
# Enables billing pages and plan-based feature gates. If False, all features # are available to all realms. BILLING_ENABLED = False FREE_TRIAL_DAYS = get_secret('free_trial_days', None) # Custom message (supports HTML) to be shown in the navbar of landing pages. Used mainly for # making announcements. LANDING_PAGE_NAVBAR_MESSAGE: Optional[str] = None # Automatically catch-up soft deactivated users when running the # `soft-deactivate-users` cron. Turn this off if the server has 10Ks of # users, and you would like to save some disk space. Soft-deactivated # returning users would still be caught-up normally. AUTO_CATCH_UP_SOFT_DEACTIVATED_USERS = True # Enables Google Analytics on selected portico pages. GOOGLE_ANALYTICS_ID: Optional[str] = None # This is overridden by dev_settings.py for droplets. IS_DEV_DROPLET = False # Used by puppet/zulip_ops/files/cron.d/check_send_receive_time. NAGIOS_BOT_HOST = EXTERNAL_HOST # Automatically deactivate users not found by the AUTH_LDAP_USER_SEARCH query. LDAP_DEACTIVATE_NON_MATCHING_USERS: Optional[bool] = None # Use half of the available CPUs for data import purposes. DEFAULT_DATA_EXPORT_IMPORT_PARALLELISM = (len(os.sched_getaffinity(0)) // 2) or 1
def gnomAD_converter(): if "--help" in input_args: print( "This is the VCF gnomAD converter provided to convert all gnomADv3.1 VCFs into CRISPRme supported VCFs" ) print( "These are the flags that must be used in order to run this function:" ) print( "\t--gnomAD_VCFdir, used to specify the directory containing gnomADv3.1 original VCFs" ) print( "\t--samplesID, used to specify the pre-generated samplesID file necessary to introduce samples into gnomAD variant" ) print( "\t--thread, used to specify the number of core used to process VCFs in parallel (DEFAULT is ALL available minus 2) [OPTIONAL]" ) exit(0) if "--gnomAD_VCFdir" not in input_args: print("--gnomAD_VCFdir not in input, MANDATORY TO CONVERT DATA") # vcf_dir = script_path+'vuota/' exit(1) else: try: vcf_dir = os.path.abspath( input_args[input_args.index("--gnomAD_VCFdir") + 1]) except IndexError: print("Please input some parameter for flag --gnomAD_VCFdir") exit(1) if not os.path.isdir(vcf_dir): print("The folder specified for --gnomAD_VCFdir does not exist") exit(1) if "--thread" not in input_args: # print("--thread must be contained in the input") # exit(1) thread = len(os.sched_getaffinity(0)) - 2 else: try: thread = input_args[input_args.index("--thread") + 1] except IndexError: print("Please input some parameter for flag --thread") exit(1) try: thread = int(thread) except: print("Please input a number for flag thread") exit(1) if thread <= 0 or thread > len(os.sched_getaffinity(0)) - 2: print("thread is set to default (ALL available minus 2)") thread = len(os.sched_getaffinity(0)) - 2 # exit(1) if "--samplesID" not in input_args: print("--samplesID not in input, MANDATORY TO CONVERT DATA") exit(1) elif "--samplesID" in input_args: try: samplefile = os.path.abspath( input_args[input_args.index("--samplesID") + 1]) except IndexError: print("Please input some parameter for flag --samplesID") exit(1) if not os.path.isfile(samplefile): print("The file specified for --samplesID does not exist") exit(1) os.system(script_path + "./convert_gnomAD.py " + vcf_dir + " " + samplefile + " " + str(thread))
def _get_files(self, paths: List[str]) -> List[File]: pool = Pool(len(sched_getaffinity(0))) files = pool.map(self._get_file, paths) pool.close() pool.join() return files
def train_classifier(parameters=None, ngram_range=(1, 1), store=True, lang="en_US", n_jobs=None): """ Train the intent classifier TODO auto invoke if sklearn version is new or first install or sth @:param store (bool) store classifier in clf.joblib """ _LOGGER.info("Started training, parallelized with {} jobs".format(n_jobs)) _LOGGER.info("Loading training set") training_set = load.training_set(lang) target_names = list(frozenset([i["unit"] for i in training_set])) _LOGGER.info("Preparing training set") if n_jobs is None: try: # Retreive the number of cpus that can be used n_jobs = len(os.sched_getaffinity(0)) except AttributeError: # n_jobs stays None such that Pool will try to # automatically set the number of processes appropriately pass with multiprocessing.Pool(processes=n_jobs) as p: train_data = p.map(_clean_text_lang(lang), [ex["text"] for ex in training_set]) train_target = [ target_names.index(example["unit"]) for example in training_set ] tfidf_model = TfidfVectorizer( sublinear_tf=True, ngram_range=ngram_range, stop_words=_get_classifier(lang).stop_words(), ) _LOGGER.info("Fit TFIDF Model") matrix = tfidf_model.fit_transform(train_data) if parameters is None: parameters = { "loss": "log", "penalty": "l2", "tol": 1e-3, "n_jobs": n_jobs, "alpha": 0.0001, "fit_intercept": True, "random_state": 0, } _LOGGER.info("Fit SGD Classifier") clf = SGDClassifier(**parameters).fit(matrix, train_target) obj = { "scikit-learn_version": pkg_resources.get_distribution("scikit-learn").version, "tfidf_model": tfidf_model, "clf": clf, "target_names": target_names, } if store: # pragma: no cover path = language.topdir(lang).joinpath("clf.joblib") _LOGGER.info("Store classifier at {}".format(path)) with path.open("wb") as file: joblib.dump(obj, file) return obj
if args.method.lower() not in ['cne', 'acne','load_dfe', 'cv2e', 'sklearn', 'nmnet2', 'oanet2', 'load_oanet', 'load_oanet_ransac']: raise ValueError('Unknown value for --method') NUM_RUNS = 1 if args.split == 'test': NUM_RUNS = 1 params = {"maxiter": args.maxiter, "inl_th": args.inlier_th, "conf": args.conf, "match_th": args.match_th } problem = 'e' OUT_DIR = get_output_dir(problem, args.split, args.method, params) IN_DIR = os.path.join(args.data_dir, args.split) if not os.path.isdir(OUT_DIR): os.makedirs(OUT_DIR) num_cores = int(len(os.sched_getaffinity(0)) * 0.9) for run in range(NUM_RUNS): seqs = sorted(os.listdir(IN_DIR)) for seq in seqs: print (f'Working on {seq}') in_models_fname = os.path.join(OUT_DIR, f'submission_models_seq_{seq}_run_{run}.h5') in_inliers_fname = os.path.join(OUT_DIR, f'submission_inliers_seq_{seq}_run_{run}.h5') if args.method.lower() == 'load_dfe': in_inliers_fname = in_inliers_fname.replace('/e/','/f/') out_models_fname = os.path.join(OUT_DIR, f'submission_upgraded_models_seq_{seq}_run_{run}.h5') out_inliers_fname = os.path.join(OUT_DIR, f'submission_upgraded_inliers_seq_{seq}_run_{run}.h5') if os.path.isfile(out_models_fname) and not args.force: print (f"Submission file {out_models_fname} already exists, skipping") continue inliers = load_h5(in_inliers_fname) models, inlier_masks = upgrade_E_submission(IN_DIR, inliers, seq,
def main(): import multiprocessing multiprocessing.set_start_method('forkserver') parser = argparse.ArgumentParser(description='Cats training.') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--normalization', type=str, choices=NORMALIZATIONS, required=True, help='Normalization method') args = parser.parse_args() gpu = args.gpu out_dir = args.out image_dir = 'images' batch_size = 32 short_edge = 256 crop_edge = 224 seed = 3141592653 n_processes = len(os.sched_getaffinity(0)) normalization = get_normalization(args.normalization) initial_lr = 0.1 epochs = 300 lr_reduce_interval = (100, 'epoch') lr_reduce_rate = 0.1 weight_decay = 5e-4 numpy_random = numpy.random.RandomState(seed) random = Random.from_numpy_random(numpy_random) train_dataset, valid_dataset, _ = CatsDataset.train_valid( image_dir, short_edge, crop_edge, random) order_sampler = iterators.ShuffleOrderSampler(numpy_random) train_iter = iterators.MultiprocessIterator(train_dataset, batch_size, repeat=True, shuffle=None, n_processes=n_processes, n_prefetch=4, order_sampler=order_sampler) valid_iter = iterators.MultiprocessIterator(valid_dataset, batch_size, repeat=False, shuffle=False, n_processes=n_processes, n_prefetch=4) numpy.random.seed(seed) model = ResNet50(len(CatsDataset.classes), normalization) model = chainer.links.Classifier(model) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() optimizer = optimizers.MomentumSGD(lr=initial_lr) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(weight_decay)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu) trainer = training.Trainer(updater, (epochs, 'epoch'), out=out_dir) trainer.extend(extensions.ExponentialShift('lr', lr_reduce_rate), trigger=lr_reduce_interval) trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.run() chainer.serializers.save_npz(os.path.join(out_dir, 'model.npz'), model)
def num_cpus(): try: return len(os.sched_getaffinity(0)) except AttributeError: return os.cpu_count()
def reset(cls): 'assign default settings' # settings / optimizations cls.NUM_PROCESSES = len(os.sched_getaffinity(0)) # use multiple cores cls.TIMEOUT = np.inf # verification timeout, in seconds (np.inf = no timeout) cls.SINGLE_SET = False # only do single-set overapproximation (no splitting) cls.PRINT_OUTPUT = True # print anything to stdout? (controls all output) cls.RESULT_SAVE_POLYS = False # save 2-d projections of output polygons to Result.polys? cls.RESULT_SAVE_POLYS_DIMS = ( 0, 1 ) # (x_dim, y_dim) of 2-d projections, used if RESULT_SAVE_POLYGONS is True cls.RESULT_SAVE_STARS = False # save LpStar objects in result? cls.RESULT_SAVE_TIMERS = [ ] # list of timers to record in Result.timers; TIMING_STATS must be True cls.FIND_CONCRETE_COUNTEREXAMPLES = True # should we try to find concrete counterexamples if spec violated? ######################### ### advanced settings ### cls.PRINT_PROGRESS = True # print periodic progress updates cls.PRINT_INTERVAL = 0.1 # print interval in seconds (0 = no printing) cls.TIMING_STATS = False # compute and print detailed timing stats cls.CHECK_SINGLE_THREAD_BLAS = True cls.UPDATE_SHARED_VARS_INTERVAL = 0.05 # interval for each thread to update shared state cls.COMPRESS_INIT_BOX = True cls.EAGER_BOUNDS = True cls.CONTRACT_ZONOTOPE = False # try domain contraction on zonotopes (more accurate prefilter, but slower) cls.CONTRACT_ZONOTOPE_LP = False # contract zonotope using LPs (even more accurate prefilter, but even slower) cls.CONTRACT_OVERAPPROX_VIOLATION = False # contract from overapproximation violation? # the types of overapproximation to use in each round cls.OVERAPPROX_TYPES = [['zono.area'], ['zono.area', 'zono.ybloat', 'zono.interval'], [ 'zono.area', 'zono.ybloat', 'zono.interval', 'star.lp' ]] cls.OVERAPPROX_NEAR_ROOT_MAX_SPLITS = 2 cls.OVERAPPROX_TYPES_NEAR_ROOT = cls.OVERAPPROX_TYPES cls.OVERAPPROX_CONTRACT_ZONO_LP = True # contract LP during overapproximation steps? cls.OVERAPPROX_GEN_LIMIT_MULTIPLIER = 1.5 # don't try approx star if multizono.gens > THIS * last_safe_gens cls.OVERAPPROX_MIN_GEN_LIMIT = 50 # minimum generators to use as cap cls.OVERAPPROX_LP_TIMEOUT = 1.0 # timeout for LP part of overapproximation, use np.inf for unbounded cls.OVERAPPROX_BOTH_BOUNDS = False # should overapprox star method compute both bounds or just reject branches? cls.SAVE_BRANCH_TUPLES_FILENAME = None cls.SAVE_BRANCH_TUPLES_TIMES = True # when saving branch tuples, also include runtimes cls.BRANCH_MODE = cls.BRANCH_OVERAPPROX cls.PRINT_BRANCH_TUPLES = False cls.TRY_QUICK_OVERAPPROX = True cls.QUICK_OVERAPPROX_TYPES = [['zono.area'], [ 'zono.area', 'zono.ybloat', 'zono.interval' ]] cls.PRINT_OVERAPPROX_OUTPUT = True # print progress on first overapprox # one_norm is especially good at finding counterexamples cls.SPLIT_ORDER = cls.SPLIT_ONE_NORM # rearrange splitting order within each layer cls.RESULT_SAVE_POLYS_EPSILON = 1e-7 # accuracy of vertices when projecting polygons for Kamenev method cls.OFFLOAD_CLOSEST_TO_ROOT = True # when offloading work to other threads, use stars closest to root of search cls.SPLIT_TOLERANCE = 1e-8 # small outputs get rounded to zero when deciding if splitting is possible cls.TEST_FUNC_BEFORE_ASSIGNMENT = None # function to call before eager assignement, used for unit testing cls.SPLIT_IF_IDLE = True # force splitting (rather than overapproximation) if there are idle processes cls.SHUFFLE_TIME = None # shuffle star sets after some time (improves unsafe specs) cls.GLPK_TIMEOUT = 60 # maximum allowed seconds for each indivudal LP run cls.GLPK_FIRST_PRIMAL = True # first try primal LP... if that fails do dual cls.GLPK_RESET_BEFORE_MINIMIZE = False # reset the lp basis before minimize cls.SKIP_COMPRESSED_CHECK = False # sanity check for compressed inputs when COMPRESS_INIT_BOX is False #### cls.NUM_LP_PROCESSES = 1 # if > 1, then force multiprocessing during lp step cls.PARALLEL_ROOT_LP = True # near the root of the search, use parallel lp, override NUM_LP_PROCESES if true #### # generally it should be safe to add any linear layers to the whitelist cls.ONNX_WHITELIST = ['Add', 'AveragePool', 'Constant', 'Concat', 'Conv', 'Flatten', 'Gather', \ 'Gemm', 'MatMul', 'Mul', 'Reshape', 'Relu', 'Shape', 'Sub', 'Unsqueeze'] cls.ONNX_BLACKLIST = ['Atan', 'MaxPool', 'Sigmoid', 'Tanh'] # unsupported nonlinear laters ### # settings for adversarial generation cls.ADVERSARIAL_TRY_QUICK = True # if settings are provided, try quick adversrial generation at start cls.ADVERSARIAL_QUICK_NUM_ATTEMPTS = 10 # how many attempts cls.ADVERSARIAL_IN_WORKERS = True # do lots of attempted adversarial generation until more work is produced cls.ADVERSARIAL_WORKERS_MAX_ITER = 100 # how many attempts workers should make cls.ADVERSARIAL_TEST_ABSTRACT_VIO = True # try executing abstract violations cls.ADVERSARIAL_SEED_ABSTRACT_VIO = False # try adversarial examples seeded from abstract violations cls.ADVERSARIAL_ONNX_PATH = None # path to .onnx file with corresponidng .onnx.pb file cls.ADVERSARIAL_EPSILON = None cls.ADVERSARIAL_ORIG_IMAGE = None cls.ADVERSARIAL_ORIG_LABEL = None cls.ADVERSARIAL_TARGET = None # can optionally be set to specific class, default: any misclassification
process_execution_local_enable_nailgun=bootstrap_options. process_execution_local_enable_nailgun, remote_cache_read=bootstrap_options.remote_cache_read, remote_cache_write=bootstrap_options.remote_cache_write, remote_cache_eager_fetch=bootstrap_options. remote_cache_eager_fetch, remote_store_initial_timeout=bootstrap_options. remote_store_initial_timeout, remote_store_timeout_multiplier=bootstrap_options. remote_store_timeout_multiplier, remote_store_maximum_timeout=bootstrap_options. remote_store_maximum_timeout, ) _CPU_COUNT = len(os.sched_getaffinity(0)) if hasattr( os, "sched_getaffinity") else os.cpu_count() DEFAULT_EXECUTION_OPTIONS = ExecutionOptions( remote_execution=False, remote_store_server=[], remote_store_thread_count=1, remote_execution_server=None, remote_store_chunk_bytes=1024 * 1024, remote_store_chunk_upload_timeout_seconds=60, remote_store_rpc_retries=2, remote_store_connection_limit=5, process_execution_local_parallelism=_CPU_COUNT, process_execution_remote_parallelism=128, process_execution_cache_namespace=None, process_execution_cleanup_local_dirs=True,
def main(argv=None): #%% Check argv if argv == None: argv = sys.argv start = time.time() ver = "1.7.4" date = 20201119 author = "Y. Morishita" print("\n{} ver{} {} {}".format(os.path.basename(argv[0]), ver, date, author), flush=True) print("{} {}".format(os.path.basename(argv[0]), ' '.join(argv[1:])), flush=True) ### For parallel processing global ifgdates2, geocdir, outdir, nlook, n_valid_thre, cycle, cmap_wrap #%% Set default geocdir = [] outdir = [] nlook = 1 radar_freq = 5.405e9 try: n_para = len(os.sched_getaffinity(0)) except: n_para = multi.cpu_count() cmap_wrap = SCM.romaO cycle = 3 n_valid_thre = 0.5 # q = multi.get_context('fork') q = multi.get_context('spawn') #%% Read options try: try: opts, args = getopt.getopt(argv[1:], "hi:o:n:", ["help", "freq=", "n_para="]) except getopt.error as msg: raise Usage(msg) for o, a in opts: if o == '-h' or o == '--help': print(__doc__) return 0 elif o == '-i': geocdir = a elif o == '-o': outdir = a elif o == '-n': nlook = int(a) elif o == '--freq': radar_freq = float(a) elif o == '--n_para': n_para = int(a) if not geocdir: raise Usage('No GEOC directory given, -d is not optional!') elif not os.path.isdir(geocdir): raise Usage('No {} dir exists!'.format(geocdir)) except Usage as err: print("\nERROR:", file=sys.stderr, end='') print(" " + str(err.msg), file=sys.stderr) print("\nFor help, use -h or --help.\n", file=sys.stderr) return 2 #%% Directory and file setting geocdir = os.path.abspath(geocdir) if not outdir: outdir = os.path.join(os.path.dirname(geocdir), 'GEOCml{}'.format(nlook)) if not os.path.exists(outdir): os.mkdir(outdir) mlipar = os.path.join(outdir, 'slc.mli.par') dempar = os.path.join(outdir, 'EQA.dem_par') no_unw_list = os.path.join(outdir, 'no_unw_list.txt') if os.path.exists(no_unw_list): os.remove(no_unw_list) bperp_file_in = os.path.join(geocdir, 'baselines') bperp_file_out = os.path.join(outdir, 'baselines') metadata_file = os.path.join(geocdir, 'metadata.txt') if os.path.exists(metadata_file): center_time = subp.check_output(['grep', 'center_time', metadata_file ]).decode().split('=')[1].strip() else: center_time = None #%% ENU for ENU in ['E', 'N', 'U']: print('\nCreate {}'.format(ENU + '.geo'), flush=True) enutif = glob.glob(os.path.join(geocdir, '*.geo.{}.tif'.format(ENU))) ### Download if not exist if len(enutif) == 0: print(' No *.geo.{}.tif found in {}'.format( ENU, os.path.basename(geocdir)), flush=True) continue else: enutif = enutif[0] ## first one ### Create float data = gdal.Open(enutif).ReadAsArray() data[data == 0] = np.nan if nlook != 1: ### Multilook data = tools_lib.multilook(data, nlook, nlook) outfile = os.path.join(outdir, ENU + '.geo') data.tofile(outfile) print(' {}.geo created'.format(ENU), flush=True) #%% mli print('\nCreate slc.mli', flush=True) mlitif = glob.glob(os.path.join(geocdir, '*.geo.mli.tif')) if len(mlitif) > 0: mlitif = mlitif[0] ## First one mli = np.float32(gdal.Open(mlitif).ReadAsArray()) mli[mli == 0] = np.nan if nlook != 1: ### Multilook mli = tools_lib.multilook(mli, nlook, nlook) mlifile = os.path.join(outdir, 'slc.mli') mli.tofile(mlifile) mlipngfile = mlifile + '.png' mli = np.log10(mli) vmin = np.nanpercentile(mli, 5) vmax = np.nanpercentile(mli, 95) plot_lib.make_im_png(mli, mlipngfile, 'gray', 'MLI (log10)', vmin, vmax, cbar=True) print(' slc.mli[.png] created', flush=True) else: print(' No *.geo.mli.tif found in {}'.format( os.path.basename(geocdir)), flush=True) #%% hgt print('\nCreate hgt', flush=True) hgttif = glob.glob(os.path.join(geocdir, '*.geo.hgt.tif')) if len(hgttif) > 0: hgttif = hgttif[0] ## First one hgt = np.float32(gdal.Open(hgttif).ReadAsArray()) hgt[hgt == 0] = np.nan if nlook != 1: ### Multilook hgt = tools_lib.multilook(hgt, nlook, nlook) hgtfile = os.path.join(outdir, 'hgt') hgt.tofile(hgtfile) hgtpngfile = hgtfile + '.png' vmax = np.nanpercentile(hgt, 99) vmin = -vmax / 3 ## bnecause 1/4 of terrain is blue plot_lib.make_im_png(hgt, hgtpngfile, 'terrain', 'DEM (m)', vmin, vmax, cbar=True) print(' hgt[.png] created', flush=True) else: print(' No *.geo.hgt.tif found in {}'.format( os.path.basename(geocdir)), flush=True) #%% tif -> float (with multilook/downsampling) print('\nCreate unw and cc', flush=True) ifgdates = tools_lib.get_ifgdates(geocdir) n_ifg = len(ifgdates) ### First check if float already exist ifgdates2 = [] for i, ifgd in enumerate(ifgdates): ifgdir1 = os.path.join(outdir, ifgd) unwfile = os.path.join(ifgdir1, ifgd + '.unw') ccfile = os.path.join(ifgdir1, ifgd + '.cc') if not (os.path.exists(unwfile) and os.path.exists(ccfile)): ifgdates2.append(ifgd) n_ifg2 = len(ifgdates2) if n_ifg - n_ifg2 > 0: print(" {0:3}/{1:3} unw and cc already exist. Skip".format( n_ifg - n_ifg2, n_ifg), flush=True) if n_ifg2 > 0: if n_para > n_ifg2: n_para = n_ifg2 ### Create float with parallel processing print(' {} parallel processing...'.format(n_para), flush=True) p = q.Pool(n_para) # rc = p.map(convert_wrapper, range(n_ifg2)) rc = p.map(convert_wrapper, [(i, ifgdates2, geocdir, outdir, nlook, n_valid_thre, cycle, cmap_wrap) for i in range(n_ifg2)]) p.close() ifgd_ok = [] for i, _rc in enumerate(rc): if _rc == 1: with open(no_unw_list, 'a') as f: print('{}'.format(ifgdates2[i]), file=f) elif _rc == 0: ifgd_ok = ifgdates2[i] ## readable tiff ### Read info ## If all float already exist, this will not be done, but no problem because ## par files should alerady exist! if ifgd_ok: unw_tiffile = os.path.join(geocdir, ifgd_ok, ifgd_ok + '.geo.unw.tif') geotiff = gdal.Open(unw_tiffile) width = geotiff.RasterXSize length = geotiff.RasterYSize lon_w_p, dlon, _, lat_n_p, _, dlat = geotiff.GetGeoTransform() ## lat lon are in pixel registration. dlat is negative lon_w_g = lon_w_p + dlon / 2 lat_n_g = lat_n_p + dlat / 2 ## to grit registration by shifting half pixel inside if nlook != 1: width = int(width / nlook) length = int(length / nlook) dlon = dlon * nlook dlat = dlat * nlook #%% EQA.dem_par, slc.mli.par if not os.path.exists(mlipar): print('\nCreate slc.mli.par', flush=True) # radar_freq = 5.405e9 ## fixed for Sentnel-1 with open(mlipar, 'w') as f: print('range_samples: {}'.format(width), file=f) print('azimuth_lines: {}'.format(length), file=f) print('radar_frequency: {} Hz'.format(radar_freq), file=f) if center_time is not None: print('center_time: {}'.format(center_time), file=f) if not os.path.exists(dempar): print('\nCreate EQA.dem_par', flush=True) text = [ "Gamma DIFF&GEO DEM/MAP parameter file", "title: DEM", "DEM_projection: EQA", "data_format: REAL*4", "DEM_hgt_offset: 0.00000", "DEM_scale: 1.00000", "width: {}".format(width), "nlines: {}".format(length), "corner_lat: {} decimal degrees".format(lat_n_g), "corner_lon: {} decimal degrees".format(lon_w_g), "post_lat: {} decimal degrees".format(dlat), "post_lon: {} decimal degrees".format(dlon), "", "ellipsoid_name: WGS 84", "ellipsoid_ra: 6378137.000 m", "ellipsoid_reciprocal_flattening: 298.2572236", "", "datum_name: WGS 1984", "datum_shift_dx: 0.000 m", "datum_shift_dy: 0.000 m", "datum_shift_dz: 0.000 m", "datum_scale_m: 0.00000e+00", "datum_rotation_alpha: 0.00000e+00 arc-sec", "datum_rotation_beta: 0.00000e+00 arc-sec", "datum_rotation_gamma: 0.00000e+00 arc-sec", "datum_country_list: Global Definition, WGS84, World\n" ] with open(dempar, 'w') as f: f.write('\n'.join(text)) #%% bperp print('\nCopy baselines file', flush=True) imdates = tools_lib.ifgdates2imdates(ifgdates) if os.path.exists(bperp_file_in): ## Check exisiting bperp_file if not io_lib.read_bperp_file(bperp_file_in, imdates): print(' baselines file found, but not complete. Make dummy', flush=True) io_lib.make_dummy_bperp(bperp_file_out, imdates) else: shutil.copyfile(bperp_file_in, bperp_file_out) else: print(' No valid baselines file exists. Make dummy.', flush=True) io_lib.make_dummy_bperp(bperp_file_out, imdates) #%% Finish elapsed_time = time.time() - start hour = int(elapsed_time / 3600) minite = int(np.mod((elapsed_time / 60), 60)) sec = int(np.mod(elapsed_time, 60)) print("\nElapsed time: {0:02}h {1:02}m {2:02}s".format(hour, minite, sec)) print('\n{} Successfully finished!!\n'.format(os.path.basename(argv[0]))) print('Output directory: {}\n'.format(os.path.relpath(outdir)))
def plot_depth(args: argparse.Namespace): """Plot depth with Matplotlib. This function is called by the main function. Argumenst --------- args : argparse.Namespace CMD argument parsed by `argparse`. Returns ------- Execution code. 0 for success. """ # process nprocs args.nprocs = len( os.sched_getaffinity(0)) if args.nprocs is None else args.nprocs # process case path args.case = pathlib.Path(args.case).expanduser().resolve() _misc.check_folder(args.case) # process level, frame_ed, topofilee, and dry_tol args = _misc.extract_info_from_setrun(args) # process args.soln_dir args.soln_dir = _misc.process_path(args.soln_dir, args.case, "_output") _misc.check_folder(args.soln_dir) # process args.dest_dir if args.use_sat: args.dest_dir = _misc.process_path( args.dest_dir, args.case, "_plots/sat/level{:02d}".format(args.level)) else: args.dest_dir = _misc.process_path( args.dest_dir, args.case, "_plots/depth/level{:02d}".format(args.level)) os.makedirs(args.dest_dir, exist_ok=True) # make sure the folder exists # process args.extent if args.extent is None: # get the minimum extent convering the solutions at all frames args.extent = _postprocessing.calc.get_soln_extent( args.soln_dir, args.frame_bg, args.frame_ed, args.level) # process the max of solution if args.cmax is None: args.cmax = _postprocessing.calc.get_soln_max(args.soln_dir, args.frame_bg, args.frame_ed, args.level) # prepare args for child processes (also initialize for the first proc) per_proc = (args.frame_ed - args.frame_bg) // args.nprocs # number of frames per porcess child_args = [copy.deepcopy(args)] child_args[0].frame_bg = args.frame_bg child_args[0].frame_ed = args.frame_bg + per_proc # the first process has to do more jobs ... child_args[0].frame_ed += (args.frame_ed - args.frame_bg) % args.nprocs # remaining processes for _ in range(args.nprocs - 1): child_args.append(copy.deepcopy(args)) child_args[-1].frame_bg = child_args[-2].frame_ed child_args[-1].frame_ed = child_args[-1].frame_bg + per_proc # if using satellite image as the background if args.use_sat: # download satellite image if necessarry with tempfile.TemporaryDirectory() as tempdir: sat_extent = _preprocessing.download_satellite_image( args.extent, pathlib.Path(tempdir).joinpath("sat_img.png")) sat_img = matplotlib.pyplot.imread( pathlib.Path(tempdir).joinpath("sat_img.png")) # change the function arguments for i in range(args.nprocs): child_args[i] = [ child_args[i], copy.deepcopy(sat_img), copy.deepcopy(sat_extent) ] # plot print("Spawning plotting tasks to {} processes: ".format(args.nprocs)) with multiprocessing.Pool( args.nprocs, lambda: print("PID {}".format(os.getpid()))) as pool: if args.use_sat: pool.starmap(plot_soln_frames_on_sat, child_args) else: pool.map(plot_soln_frames, child_args) return 0
def worker(worker_id, pool_id, pool_size, task_queue, result_queue, worker_queue, tasks_in_progress, cpu_affinity): """ Put request token into queue Get task from task_queue Pop request from queue Put result into result_queue """ start_file_logger('{}/block-{}/{}/worker_{}.log'.format(args.logdir, args.block_id, pool_id, worker_id), worker_id, name="worker_log", level=logging.DEBUG if args.debug else logging.INFO) # Store worker ID as an environment variable os.environ['PARSL_WORKER_RANK'] = str(worker_id) os.environ['PARSL_WORKER_COUNT'] = str(pool_size) os.environ['PARSL_WORKER_POOL_ID'] = str(pool_id) os.environ['PARSL_WORKER_BLOCK_ID'] = str(args.block_id) # Sync worker with master logger.info('Worker {} started'.format(worker_id)) if args.debug: logger.debug("Debug logging enabled") # If desired, set process affinity if cpu_affinity != "none": # Count the number of cores per worker avail_cores = sorted(os.sched_getaffinity(0)) # Get the available processors cores_per_worker = len(avail_cores) // pool_size assert cores_per_worker > 0, "Affinity does not work if there are more workers than cores" # Determine this worker's cores if cpu_affinity == "block": my_cores = avail_cores[cores_per_worker * worker_id:cores_per_worker * (worker_id + 1)] elif cpu_affinity == "alternating": my_cores = avail_cores[worker_id::pool_size] else: raise ValueError("Affinity strategy {} is not supported".format(cpu_affinity)) # Set the affinity for this worker os.sched_setaffinity(0, my_cores) logger.info("Set worker CPU affinity to {}".format(my_cores)) while True: worker_queue.put(worker_id) # The worker will receive {'task_id':<tid>, 'buffer':<buf>} req = task_queue.get() tasks_in_progress[worker_id] = req tid = req['task_id'] logger.info("Received task {}".format(tid)) try: worker_queue.get() except queue.Empty: logger.warning("Worker ID: {} failed to remove itself from ready_worker_queue".format(worker_id)) pass try: result = execute_task(req['buffer']) serialized_result = serialize(result, buffer_threshold=1e6) except Exception as e: logger.info('Caught an exception: {}'.format(e)) result_package = {'task_id': tid, 'exception': serialize(RemoteExceptionWrapper(*sys.exc_info()))} else: result_package = {'task_id': tid, 'result': serialized_result} # logger.debug("Result: {}".format(result)) logger.info("Completed task {}".format(tid)) try: pkl_package = pickle.dumps(result_package) except Exception: logger.exception("Caught exception while trying to pickle the result package") pkl_package = pickle.dumps({'task_id': tid, 'exception': serialize(RemoteExceptionWrapper(*sys.exc_info())) }) result_queue.put(pkl_package) tasks_in_progress.pop(worker_id)
def main(argv=None): #%% Check argv if argv == None: argv = sys.argv start = time.time() ver = "1.4.6" date = 20201119 author = "Y. Morishita" print("\n{} ver{} {} {}".format(os.path.basename(argv[0]), ver, date, author), flush=True) print("{} {}".format(os.path.basename(argv[0]), ' '.join(argv[1:])), flush=True) ## For parallel processing global n_para_gap, G, Aloop, unwpatch, imdates, incdir, ifgdir, length, width,\ coef_r2m, ifgdates, ref_unw, cycle, keep_incfile, resdir, restxtfile, \ cmap_vel, cmap_wrap, wavelength #%% Set default ifgdir = [] tsadir = [] inv_alg = 'LS' try: n_para = len(os.sched_getaffinity(0)) except: n_para = multi.cpu_count() n_para_inv = 1 memory_size = 4000 gamma = 0.0001 n_unw_r_thre = [] keep_incfile = False cmap_vel = SCM.roma.reversed() cmap_noise = 'viridis' cmap_noise_r = 'viridis_r' cmap_wrap = SCM.romaO q = multi.get_context('fork') #%% Read options try: try: opts, args = getopt.getopt(argv[1:], "hd:t:", [ "help", "mem_size=", "gamma=", "n_unw_r_thre=", "keep_incfile", "inv_alg=", "n_para=" ]) except getopt.error as msg: raise Usage(msg) for o, a in opts: if o == '-h' or o == '--help': print(__doc__) return 0 elif o == '-d': ifgdir = a elif o == '-t': tsadir = a elif o == '--mem_size': memory_size = float(a) elif o == '--gamma': gamma = float(a) elif o == '--n_unw_r_thre': n_unw_r_thre = float(a) elif o == '--keep_incfile': keep_incfile = True elif o == '--inv_alg': inv_alg = a elif o == '--n_para': n_para = int(a) if not ifgdir: raise Usage('No data directory given, -d is not optional!') elif not os.path.isdir(ifgdir): raise Usage('No {} dir exists!'.format(ifgdir)) elif not os.path.exists(os.path.join(ifgdir, 'slc.mli.par')): raise Usage('No slc.mli.par file exists in {}!'.format(ifgdir)) except Usage as err: print("\nERROR:", file=sys.stderr, end='') print(" " + str(err.msg), file=sys.stderr) print("\nFor help, use -h or --help.\n", file=sys.stderr) return 2 #%% Directory settings ifgdir = os.path.abspath(ifgdir) if not tsadir: tsadir = os.path.join(os.path.dirname(ifgdir), 'TS_' + os.path.basename(ifgdir)) if not os.path.isdir(tsadir): print('\nNo {} exists!'.format(tsadir), file=sys.stderr) return 1 tsadir = os.path.abspath(tsadir) resultsdir = os.path.join(tsadir, 'results') infodir = os.path.join(tsadir, 'info') netdir = os.path.join(tsadir, 'network') bad_ifg11file = os.path.join(infodir, '11bad_ifg.txt') bad_ifg12file = os.path.join(infodir, '12bad_ifg.txt') reffile = os.path.join(infodir, '12ref.txt') if not os.path.exists(reffile): ## for old LiCSBAS12 < v1.1 reffile = os.path.join(infodir, 'ref.txt') incdir = os.path.join(tsadir, '13increment') if not os.path.exists(incdir): os.mkdir(incdir) resdir = os.path.join(tsadir, '13resid') if not os.path.exists(resdir): os.mkdir(resdir) restxtfile = os.path.join(infodir, '13resid.txt') cumh5file = os.path.join(tsadir, 'cum.h5') #%% Check files try: if not os.path.exists(bad_ifg11file): raise Usage('No 11bad_ifg.txt file exists in {}!'.format(infodir)) if not os.path.exists(bad_ifg12file): raise Usage('No 12bad_ifg.txt file exists in {}!'.format(infodir)) if not os.path.exists(reffile): raise Usage('No 12ref.txt file exists in {}!'.format(infodir)) except Usage as err: print("\nERROR:", file=sys.stderr, end='') print(" " + str(err.msg), file=sys.stderr) print("\nFor help, use -h or --help.\n", file=sys.stderr) return 2 #%% Set preliminaly reference with open(reffile, "r") as f: refarea = f.read().split()[0] #str, x1/x2/y1/y2 refx1, refx2, refy1, refy2 = [int(s) for s in re.split('[:/]', refarea)] #%% Read data information ### Get size mlipar = os.path.join(ifgdir, 'slc.mli.par') width = int(io_lib.get_param_par(mlipar, 'range_samples')) length = int(io_lib.get_param_par(mlipar, 'azimuth_lines')) speed_of_light = 299792458 #m/s radar_frequency = float(io_lib.get_param_par(mlipar, 'radar_frequency')) #Hz wavelength = speed_of_light / radar_frequency #meter coef_r2m = -wavelength / 4 / np.pi * 1000 #rad -> mm, positive is -LOS ### Calc pixel spacing depending on IFG or GEOC, used in later spatial filter dempar = os.path.join(ifgdir, 'EQA.dem_par') width_geo = int(io_lib.get_param_par(dempar, 'width')) length_geo = int(io_lib.get_param_par(dempar, 'nlines')) dlat = float(io_lib.get_param_par(dempar, 'post_lat')) #negative dlon = float(io_lib.get_param_par(dempar, 'post_lon')) #positive lat1 = float(io_lib.get_param_par(dempar, 'corner_lat')) lon1 = float(io_lib.get_param_par(dempar, 'corner_lon')) if width == width_geo and length == length_geo: ## Geocoded print('In geographical coordinates', flush=True) centerlat = lat1 + dlat * (length / 2) ra = float(io_lib.get_param_par(dempar, 'ellipsoid_ra')) recip_f = float( io_lib.get_param_par(dempar, 'ellipsoid_reciprocal_flattening')) rb = ra * (1 - 1 / recip_f) ## polar radius pixsp_a = 2 * np.pi * rb / 360 * abs(dlat) pixsp_r = 2 * np.pi * ra / 360 * dlon * np.cos(np.deg2rad(centerlat)) else: print('In radar coordinates', flush=True) pixsp_r_org = float(io_lib.get_param_par(mlipar, 'range_pixel_spacing')) pixsp_a = float(io_lib.get_param_par(mlipar, 'azimuth_pixel_spacing')) inc_agl = float(io_lib.get_param_par(mlipar, 'incidence_angle')) pixsp_r = pixsp_r_org / np.sin(np.deg2rad(inc_agl)) ### Set n_unw_r_thre and cycle depending on L- or C-band if wavelength > 0.2: ## L-band if not n_unw_r_thre: n_unw_r_thre = 0.5 cycle = 1.5 # 2pi/cycle for comparison png elif wavelength <= 0.2: ## C-band if not n_unw_r_thre: n_unw_r_thre = 1.0 cycle = 3 # 3*2pi/cycle for comparison png #%% Read date and network information ### Get all ifgdates in ifgdir ifgdates_all = tools_lib.get_ifgdates(ifgdir) imdates_all = tools_lib.ifgdates2imdates(ifgdates_all) n_im_all = len(imdates_all) n_ifg_all = len(ifgdates_all) ### Read bad_ifg11 and 12 bad_ifg11 = io_lib.read_ifg_list(bad_ifg11file) bad_ifg12 = io_lib.read_ifg_list(bad_ifg12file) bad_ifg_all = list(set(bad_ifg11 + bad_ifg12)) bad_ifg_all.sort() ### Remove bad ifgs and images from list ifgdates = list(set(ifgdates_all) - set(bad_ifg_all)) ifgdates.sort() imdates = tools_lib.ifgdates2imdates(ifgdates) n_ifg = len(ifgdates) n_ifg_bad = len(set(bad_ifg11 + bad_ifg12)) n_im = len(imdates) n_unw_thre = int(n_unw_r_thre * n_im) ### Make 13used_image.txt imfile = os.path.join(infodir, '13used_image.txt') with open(imfile, 'w') as f: for i in imdates: print('{}'.format(i), file=f) ### Calc dt in year imdates_dt = ([ dt.datetime.strptime(imd, '%Y%m%d').toordinal() for imd in imdates ]) dt_cum = np.float32((np.array(imdates_dt) - imdates_dt[0]) / 365.25) ### Construct G and Aloop matrix for increment and n_gap G = inv_lib.make_sb_matrix(ifgdates) Aloop = loop_lib.make_loop_matrix(ifgdates) #%% Plot network ## Read bperp data or dummy bperp_file = os.path.join(ifgdir, 'baselines') if os.path.exists(bperp_file): bperp_all = io_lib.read_bperp_file(bperp_file, imdates_all) bperp = io_lib.read_bperp_file(bperp_file, imdates) else: #dummy bperp_all = np.random.random(len(imdates_all)).tolist() bperp = np.random.random(n_im).tolist() pngfile = os.path.join(netdir, 'network13_all.png') plot_lib.plot_network(ifgdates_all, bperp_all, [], pngfile) pngfile = os.path.join(netdir, 'network13.png') plot_lib.plot_network(ifgdates_all, bperp_all, bad_ifg_all, pngfile) pngfile = os.path.join(netdir, 'network13_nobad.png') plot_lib.plot_network(ifgdates_all, bperp_all, bad_ifg_all, pngfile, plot_bad=False) #%% Get patch row number if inv_alg == 'WLS': n_store_data = n_ifg * 3 + n_im * 2 + n_im * 0.3 # else: n_store_data = n_ifg * 2 + n_im * 2 + n_im * 0.3 #not sure n_patch, patchrow = tools_lib.get_patchrow(width, length, n_store_data, memory_size) #%% Display and output settings & parameters print('') print('Size of image (w,l) : {}, {}'.format(width, length)) print('# of all images : {}'.format(n_im_all)) print('# of images to be used : {}'.format(n_im)) print('# of all ifgs : {}'.format(n_ifg_all)) print('# of ifgs to be used : {}'.format(n_ifg)) print('# of removed ifgs : {}'.format(n_ifg_bad)) print('Threshold of used unw : {}'.format(n_unw_thre)) print('') print('Reference area (X/Y) : {}:{}/{}:{}'.format( refx1, refx2, refy1, refy2)) print('Allowed memory size : {} MB'.format(memory_size)) print('Number of patches : {}'.format(n_patch)) print('Inversion algorism : {}'.format(inv_alg)) print('Gamma value : {}'.format(gamma), flush=True) with open(os.path.join(infodir, '13parameters.txt'), "w") as f: print('range_samples: {}'.format(width), file=f) print('azimuth_lines: {}'.format(length), file=f) print('wavelength: {}'.format(wavelength), file=f) print('n_im_all: {}'.format(n_im_all), file=f) print('n_im: {}'.format(n_im), file=f) print('n_ifg_all: {}'.format(n_ifg_all), file=f) print('n_ifg: {}'.format(n_ifg), file=f) print('n_ifg_bad: {}'.format(n_ifg_bad), file=f) print('n_unw_thre: {}'.format(n_unw_thre), file=f) print('ref_area: {}:{}/{}:{}'.format(refx1, refx2, refy1, refy2), file=f) print('memory_size: {} MB'.format(memory_size), file=f) print('n_patch: {}'.format(n_patch), file=f) print('inv_alg: {}'.format(inv_alg), file=f) print('gamma: {}'.format(gamma), file=f) print('pixel_spacing_r: {:.2f} m'.format(pixsp_r), file=f) print('pixel_spacing_a: {:.2f} m'.format(pixsp_a), file=f) #%% Ref phase for inversion lengththis = refy2 - refy1 countf = width * refy1 countl = width * lengththis # Number to be read ref_unw = [] for i, ifgd in enumerate(ifgdates): unwfile = os.path.join(ifgdir, ifgd, ifgd + '.unw') f = open(unwfile, 'rb') f.seek(countf * 4, os.SEEK_SET) #Seek for >=2nd path, 4 means byte ### Read unw data (mm) at ref area unw = np.fromfile(f, dtype=np.float32, count=countl).reshape( (lengththis, width))[:, refx1:refx2] * coef_r2m unw[unw == 0] = np.nan if np.all(np.isnan(unw)): print('All nan in ref area in {}.'.format(ifgd)) print('Rerun LiCSBAS12.') return 1 ref_unw.append(np.nanmean(unw)) f.close() #%% Open cum.h5 for output if os.path.exists(cumh5file): os.remove(cumh5file) cumh5 = h5.File(cumh5file, 'w') cumh5.create_dataset('imdates', data=[np.int32(imd) for imd in imdates]) if not np.all(np.abs(np.array(bperp)) <= 1): # if not dummy cumh5.create_dataset('bperp', data=bperp) cum = cumh5.require_dataset('cum', (n_im, length, width), dtype=np.float32) vel = cumh5.require_dataset('vel', (length, width), dtype=np.float32) vconst = cumh5.require_dataset('vintercept', (length, width), dtype=np.float32) gap = cumh5.require_dataset('gap', (n_im - 1, length, width), dtype=np.int8) if width == width_geo and length == length_geo: ## if geocoded cumh5.create_dataset('corner_lat', data=lat1) cumh5.create_dataset('corner_lon', data=lon1) cumh5.create_dataset('post_lat', data=dlat) cumh5.create_dataset('post_lon', data=dlon) #%% For each patch for i_patch, rows in enumerate(patchrow): print('\nProcess {0}/{1}th line ({2}/{3}th patch)...'.format( rows[1], patchrow[-1][-1], i_patch + 1, n_patch), flush=True) start2 = time.time() #%% Read data ### Allocate memory lengththis = rows[1] - rows[0] n_pt_all = lengththis * width unwpatch = np.zeros((n_ifg, lengththis, width), dtype=np.float32) if inv_alg == 'WLS': cohpatch = np.zeros((n_ifg, lengththis, width), dtype=np.float32) ### For each ifg print(" Reading {0} ifg's unw data...".format(n_ifg), flush=True) countf = width * rows[0] countl = width * lengththis for i, ifgd in enumerate(ifgdates): unwfile = os.path.join(ifgdir, ifgd, ifgd + '.unw') f = open(unwfile, 'rb') f.seek(countf * 4, os.SEEK_SET) #Seek for >=2nd patch, 4 means byte ### Read unw data (mm) at patch area unw = np.fromfile(f, dtype=np.float32, count=countl).reshape( (lengththis, width)) * coef_r2m unw[unw == 0] = np.nan # Fill 0 with nan unw = unw - ref_unw[i] unwpatch[i] = unw f.close() ### Read coh file at patch area for WLS if inv_alg == 'WLS': cohfile = os.path.join(ifgdir, ifgd, ifgd + '.cc') f = open(cohfile, 'rb') if os.path.getsize(cohfile) == length * width: ## uint8 format f.seek(countf, os.SEEK_SET) #Seek for >=2nd patch cohpatch[i, :, :] = (np.fromfile( f, dtype=np.uint8, count=countl).reshape( (lengththis, width))).astype(np.float32) / 255 else: ## old float32 format f.seek(countf * 4, os.SEEK_SET) #Seek for >=2nd patch, 4 means byte cohpatch[i, :, :] = np.fromfile(f, dtype=np.float32, count=countl).reshape( (lengththis, width)) cohpatch[cohpatch == 0] = np.nan unwpatch = unwpatch.reshape( (n_ifg, n_pt_all)).transpose() #(n_pt_all, n_ifg) ### Calc variance from coherence for WLS if inv_alg == 'WLS': cohpatch = cohpatch.reshape( (n_ifg, n_pt_all)).transpose() #(n_pt_all, n_ifg) cohpatch[ cohpatch < 0.01] = 0.01 ## because negative value possible due to geocode cohpatch[ cohpatch > 0.99] = 0.99 ## because >1 possible due to geocode varpatch = (1 - cohpatch**2) / (2 * cohpatch**2) del cohpatch #%% Remove points with less valid data than n_unw_thre ix_unnan_pt = np.where( np.sum(~np.isnan(unwpatch), axis=1) > n_unw_thre)[0] n_pt_unnan = len(ix_unnan_pt) unwpatch = unwpatch[ix_unnan_pt, :] ## keep only unnan data if inv_alg == 'WLS': varpatch = varpatch[ix_unnan_pt, :] ## keep only unnan data print(' {}/{} points removed due to not enough ifg data...'.format( n_pt_all - n_pt_unnan, n_pt_all), flush=True) #%% Compute number of gaps, ifg_noloop, maxTlen point-by-point if n_pt_unnan != 0: ns_gap_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan gap_patch = np.zeros((n_im - 1, n_pt_all), dtype=np.int8) ns_ifg_noloop_patch = np.zeros( (n_pt_all), dtype=np.float32) * np.nan maxTlen_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan ### Determine n_para n_pt_patch_min = 1000 if n_pt_patch_min * n_para > n_pt_unnan: ## Too much n_para n_para_gap = int(np.floor(n_pt_unnan / n_pt_patch_min)) if n_para_gap == 0: n_para_gap = 1 else: n_para_gap = n_para print('\n Identifing gaps, and counting n_gap and n_ifg_noloop,') print(' with {} parallel processing...'.format(n_para_gap), flush=True) ### Devide unwpatch by n_para for parallel processing p = q.Pool(n_para_gap) _result = np.array(p.map(count_gaps_wrapper, range(n_para_gap)), dtype=object) p.close() ns_gap_patch[ix_unnan_pt] = np.hstack(_result[:, 0]) #n_pt gap_patch[:, ix_unnan_pt] = np.hstack(_result[:, 1]) #n_im-1, n_pt ns_ifg_noloop_patch[ix_unnan_pt] = np.hstack(_result[:, 2]) ### maxTlen _maxTlen = np.zeros((n_pt_unnan), dtype=np.float32) #temporaly _Tlen = np.zeros((n_pt_unnan), dtype=np.float32) #temporaly for imx in range(n_im - 1): _Tlen = _Tlen + (dt_cum[imx + 1] - dt_cum[imx]) ## Adding dt _Tlen[gap_patch[imx, ix_unnan_pt] == 1] = 0 ## reset to 0 if gap _maxTlen[_maxTlen < _Tlen] = _Tlen[ _maxTlen < _Tlen] ## Set Tlen to maxTlen maxTlen_patch[ix_unnan_pt] = _maxTlen #%% Time series inversion print('\n Small Baseline inversion by {}...\n'.format(inv_alg), flush=True) if inv_alg == 'WLS': inc_tmp, vel_tmp, vconst_tmp = inv_lib.invert_nsbas_wls( unwpatch, varpatch, G, dt_cum, gamma, n_para_inv) else: inc_tmp, vel_tmp, vconst_tmp = inv_lib.invert_nsbas( unwpatch, G, dt_cum, gamma, n_para_inv) ### Set to valuables inc_patch = np.zeros( (n_im - 1, n_pt_all), dtype=np.float32) * np.nan vel_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan vconst_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan inc_patch[:, ix_unnan_pt] = inc_tmp vel_patch[ix_unnan_pt] = vel_tmp vconst_patch[ix_unnan_pt] = vconst_tmp ### Calculate residuals res_patch = np.zeros((n_ifg, n_pt_all), dtype=np.float32) * np.nan res_patch[:, ix_unnan_pt] = unwpatch.T - np.dot(G, inc_tmp) res_sumsq = np.nansum(res_patch**2, axis=0) res_n = np.float32((~np.isnan(res_patch)).sum(axis=0)) res_n[res_n == 0] = np.nan # To avoid 0 division res_rms_patch = np.sqrt(res_sumsq / res_n) ### Cumulative displacememt cum_patch = np.zeros((n_im, n_pt_all), dtype=np.float32) * np.nan cum_patch[1:, :] = np.cumsum(inc_patch, axis=0) ## Fill 1st image with 0 at unnan points from 2nd images bool_unnan_pt = ~np.isnan(cum_patch[1, :]) cum_patch[0, bool_unnan_pt] = 0 ## Drop (fill with nan) interpolated cum by 2 continuous gaps for i in range(n_im - 2): ## from 1->n_im-1 gap2 = gap_patch[i, :] + gap_patch[i + 1, :] bool_gap2 = (gap2 == 2 ) ## true if 2 continuous gaps for each point cum_patch[i + 1, :][bool_gap2] = np.nan ## Last (n_im th) image. 1 gap means interpolated cum_patch[-1, :][gap_patch[-1, :] == 1] = np.nan #%% Fill by np.nan if n_pt_unnan == 0 else: cum_patch = np.zeros((n_im, n_pt_all), dtype=np.float32) * np.nan vel_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan vconst_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan gap_patch = np.zeros((n_im - 1, n_pt_all), dtype=np.int8) inc_patch = np.zeros( (n_im - 1, n_pt_all), dtype=np.float32) * np.nan res_patch = np.zeros((n_ifg, n_pt_all), dtype=np.float32) * np.nan res_rms_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan ns_gap_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan ns_ifg_noloop_patch = np.zeros( (n_pt_all), dtype=np.float32) * np.nan maxTlen_patch = np.zeros((n_pt_all), dtype=np.float32) * np.nan #%% Output data and image ### cum.h5 file cum[:, rows[0]:rows[1], :] = cum_patch.reshape( (n_im, lengththis, width)) vel[rows[0]:rows[1], :] = vel_patch.reshape((lengththis, width)) vconst[rows[0]:rows[1], :] = vconst_patch.reshape((lengththis, width)) gap[:, rows[0]:rows[1], :] = gap_patch.reshape( (n_im - 1, lengththis, width)) ### Others openmode = 'w' if rows[0] == 0 else 'a' #w only 1st patch ## For each imd. cum and inc for imx, imd in enumerate(imdates): ## Incremental displacement if imd == imdates[-1]: continue #skip last incfile = os.path.join(incdir, '{0}_{1}.inc'.format(imd, imdates[imx + 1])) with open(incfile, openmode) as f: inc_patch[imx, :].tofile(f) ## For each ifgd. resid for i, ifgd in enumerate(ifgdates): resfile = os.path.join(resdir, '{0}.res'.format(ifgd)) with open(resfile, openmode) as f: res_patch[i, :].tofile(f) ## velocity and noise indecies in results dir names = [ 'vel', 'vintercept', 'resid_rms', 'n_gap', 'n_ifg_noloop', 'maxTlen' ] data = [ vel_patch, vconst_patch, res_rms_patch, ns_gap_patch, ns_ifg_noloop_patch, maxTlen_patch ] for i in range(len(names)): file = os.path.join(resultsdir, names[i]) with open(file, openmode) as f: data[i].tofile(f) #%% Finish patch elapsed_time2 = int(time.time() - start2) hour2 = int(elapsed_time2 / 3600) minite2 = int(np.mod((elapsed_time2 / 60), 60)) sec2 = int(np.mod(elapsed_time2, 60)) print(" Elapsed time for {0}th patch: {1:02}h {2:02}m {3:02}s".format( i_patch + 1, hour2, minite2, sec2), flush=True) #%% Find stable ref point print('\nFind stable reference point...', flush=True) ### Compute RMS of time series with reference to all points sumsq_cum_wrt_med = np.zeros((length, width), dtype=np.float32) for i in range(n_im): sumsq_cum_wrt_med = sumsq_cum_wrt_med + (cum[i, :, :] - np.nanmedian(cum[i, :, :]))**2 rms_cum_wrt_med = np.sqrt(sumsq_cum_wrt_med / n_im) ### Mask by minimum n_gap n_gap = io_lib.read_img(os.path.join(resultsdir, 'n_gap'), length, width) min_n_gap = np.nanmin(n_gap) mask_n_gap = np.float32(n_gap == min_n_gap) mask_n_gap[mask_n_gap == 0] = np.nan rms_cum_wrt_med = rms_cum_wrt_med * mask_n_gap ### Find stable reference min_rms = np.nanmin(rms_cum_wrt_med) refy1s, refx1s = np.where(rms_cum_wrt_med == min_rms) refy1s, refx1s = refy1s[0], refx1s[0] ## Only first index refy2s, refx2s = refy1s + 1, refx1s + 1 print('Selected ref: {}:{}/{}:{}'.format(refx1s, refx2s, refy1s, refy2s), flush=True) ### Rerferencing cumulative displacement and vel to new stable ref for i in range(n_im): cum[i, :, :] = cum[i, :, :] - cum[i, refy1s, refx1s] vel = vel - vel[refy1s, refx1s] vconst = vconst - vconst[refy1s, refx1s] ### Save image rms_cum_wrt_med_file = os.path.join(infodir, '13rms_cum_wrt_med') with open(rms_cum_wrt_med_file, 'w') as f: rms_cum_wrt_med.tofile(f) pngfile = os.path.join(infodir, '13rms_cum_wrt_med.png') plot_lib.make_im_png(rms_cum_wrt_med, pngfile, cmap_noise_r, 'RMS of cum wrt median (mm)', np.nanpercentile(rms_cum_wrt_med, 1), np.nanpercentile(rms_cum_wrt_med, 99)) ### Save ref cumh5.create_dataset('refarea', data='{}:{}/{}:{}'.format(refx1s, refx2s, refy1s, refy2s)) refsfile = os.path.join(infodir, '13ref.txt') with open(refsfile, 'w') as f: print('{}:{}/{}:{}'.format(refx1s, refx2s, refy1s, refy2s), file=f) if width == width_geo and length == length_geo: ## Geocoded ### Make ref_stable.kml reflat = lat1 + dlat * refy1s reflon = lon1 + dlon * refx1s io_lib.make_point_kml(reflat, reflon, os.path.join(infodir, '13ref.kml')) #%% Close h5 file cumh5.close() #%% Output png images ### Incremental displacement _n_para = n_im - 1 if n_para > n_im - 1 else n_para print( '\nOutput increment png images with {} parallel processing...'.format( _n_para), flush=True) p = q.Pool(_n_para) p.map(inc_png_wrapper, range(n_im - 1)) p.close() ### Residual for each ifg. png and txt. with open(restxtfile, "w") as f: print('# RMS of residual (mm)', file=f) _n_para = n_ifg if n_para > n_ifg else n_para print('\nOutput residual png images with {} parallel processing...'.format( _n_para), flush=True) p = q.Pool(_n_para) p.map(resid_png_wrapper, range(n_ifg)) p.close() ### Velocity and noise indices cmins = [None, None, None, None, None, None] cmaxs = [None, None, None, None, None, None] cmaps = [ cmap_vel, cmap_vel, cmap_noise_r, cmap_noise_r, cmap_noise_r, cmap_noise ] titles = [ 'Velocity (mm/yr)', 'Intercept of velocity (mm)', 'RMS of residual (mm)', 'Number of gaps in SB network', 'Number of ifgs with no loops', 'Max length of connected SB network (yr)' ] print('\nOutput noise png images...', flush=True) for i in range(len(names)): file = os.path.join(resultsdir, names[i]) data = io_lib.read_img(file, length, width) pngfile = file + '.png' ## Get color range if None if cmins[i] is None: cmins[i] = np.nanpercentile(data, 1) if cmaxs[i] is None: cmaxs[i] = np.nanpercentile(data, 99) if cmins[i] == cmaxs[i]: cmins[i] = cmaxs[i] - 1 plot_lib.make_im_png(data, pngfile, cmaps[i], titles[i], cmins[i], cmaxs[i]) #%% Finish elapsed_time = time.time() - start hour = int(elapsed_time / 3600) minite = int(np.mod((elapsed_time / 60), 60)) sec = int(np.mod(elapsed_time, 60)) print("\nElapsed time: {0:02}h {1:02}m {2:02}s".format(hour, minite, sec)) print('\n{} Successfully finished!!\n'.format(os.path.basename(argv[0]))) print('Output directory: {}\n'.format(os.path.relpath(tsadir)))
""" Runs the web server """ from os import sched_getaffinity from . import application application.run(host='0.0.0.0', port=8080, workers=len(sched_getaffinity(0)))
def log_execution_env_state(config_path=None, logdir=None, gitroot='.'): """Log information about the execution environment. File 'config_path' will be copied to directory 'logdir'. A common use-case is passing the path to a (compression) schedule YAML file. Storing a copy of the schedule file, with the experiment logs, is useful in order to reproduce experiments. Args: config_path: path to config file, used only when logdir is set logdir: log directory git_root: the path to the .git root directory """ def log_git_state(): """Log the state of the git repository. It is useful to know what git tag we're using, and if we have outstanding code. """ try: repo = Repo(gitroot) assert not repo.bare except InvalidGitRepositoryError: logger.debug("Cannot find a Git repository. You probably downloaded an archive of Distiller.") return if repo.is_dirty(): logger.debug("Git is dirty") try: branch_name = repo.active_branch.name except TypeError: branch_name = "None, Git is in 'detached HEAD' state" logger.debug("Active Git branch: %s", branch_name) logger.debug("Git commit: %s" % repo.head.commit.hexsha) logger.debug("Number of CPUs: %d", len(os.sched_getaffinity(0))) logger.debug("Number of GPUs: %d", torch.cuda.device_count()) logger.debug("CUDA version: %s", torch.version.cuda) logger.debug("CUDNN version: %s", torch.backends.cudnn.version()) logger.debug("Kernel: %s", platform.release()) if HAVE_LSB: logger.debug("OS: %s", lsb_release.get_lsb_information()['DESCRIPTION']) logger.debug("Python: %s", sys.version) logger.debug("PyTorch: %s", torch.__version__) logger.debug("Numpy: %s", np.__version__) log_git_state() logger.debug("Command line: %s", " ".join(sys.argv)) if (logdir is None) or (config_path is None): return # clone configuration files to output directory configs_dest = os.path.join(logdir, 'configs') with contextlib.suppress(FileExistsError): os.makedirs(configs_dest) if os.path.exists(os.path.join(configs_dest, os.path.basename(config_path))): logger.debug('{} already exists in logdir'.format( os.path.basename(config_path) or config_path)) else: try: shutil.copy(config_path, configs_dest) except OSError as e: logger.debug('Failed to copy of config file: {}'.format(str(e)))
def main(argv): levels = [ logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL ] hashseed = os.environ.get("PYTHONHASHSEED", "random") random.seed(None) seedstr = ''.join(random.SystemRandom().choice(ALPHABET) for _ in range(20)) seedstr = os.environ.get("LONGLIFESEED", seedstr) inputfile = None visual = True fps = 25 studentAgent = StudentAgent debug = 4 calibrate = False try: opts, args = getopt.getopt(argv, "hm:s:vf:cd:", [ "help", "map=", "student-agent=", "no-video", "fps=", "calibrate", "debug=" ]) except getopt.GetoptError as e: print(e) print(USAGE) sys.exit(2) for opt, arg in opts: if opt == '-h': print(USAGE) sys.exit() elif opt in ["-m", "--map"]: inputfile = arg elif opt in ["-s", "--student-agent"]: classmodule = importlib.import_module(arg.lower()) studentAgent = getattr(classmodule, arg) elif opt in ["-v", "--no-video"]: visual = False elif opt in ["-f", "--fps"]: fps = int(arg) elif opt in ["-c", "--calibrate"]: calibrate = True elif opt in ["-d", "--debug"]: debug = int(arg) logging.basicConfig(format='%(levelname)s:\t%(message)s', level=levels[debug]) try: # Try to pin process to a single CPU (to have more predictable times) aff = os.sched_getaffinity(0) logging.debug("Original affinity: {}".format(aff)) cpu = aff.pop() os.sched_setaffinity( 0, [cpu]) # TODO: does not seem to make a difference... aff = os.sched_getaffinity(0) logging.debug("New affinity: {}".format(aff)) except: # Operating System may not support setaffinity. Not a big deal. logging.debug("Could not set CPU affinity for the process.") try: # print("Launching game. PYTHONHASHSEED={} LONGLIFESEED={}".format(hashseed, seedstr)) # logging.info("Launching game. PYTHONHASHSEED={} LONGLIFESEED={}".format(hashseed, seedstr)) # logging.info("cwd={!r} argv={!r} path={!r}".format(os.getcwd(), sys.argv, sys.path)) game = AgentGame(AgentClass=studentAgent, width=60, height=40, filename=inputfile, walls=15, foodquant=1, timeslot=0.020, calibrate=calibrate, visual=visual, fps=fps, tilesize=20, seeds=(seedstr[::2], seedstr[1::2])) score = game.start() print(score) except Exception as e: logging.exception(e) sys.exit(1)
# Import packages for cleaning, tokenizing, and stemming text import re # For parsing text from unicodedata import normalize # for cleaning text by converting unicode character encodings into readable format from nltk import word_tokenize, sent_tokenize # widely used text tokenizer from nltk.stem.porter import PorterStemmer # an approximate method of stemming words (it just cuts off the ends) from nltk.stem.porter import PorterStemmer # approximate but effective (and common) method of normalizing words: stems words by implementing a hierarchy of linguistic rules that transform or cut off word endings stem = PorterStemmer().stem # Makes stemming more accessible from nltk.corpus import stopwords # for eliminating stop words import gensim # For word embedding models from gensim.models.phrases import Phrases # Makes word2vec more robust: Looks not just at To look for multi-word phrases within word2vec # Import packages for multiprocessing import os # For navigation numcpus = len( os.sched_getaffinity(0)) # Detect and assign number of available CPUs from multiprocessing import Pool # key function for multiprocessing, to increase processing speed pool = Pool(processes=numcpus) # Pre-load number of CPUs into pool function import Cython # For parallelizing word2vec mpdo = False # Set to 'True' if using multiprocessing--faster for creating words by sentence file, but more complicated nltk.download('stopwords') nltk.download('punkt') nltk.download('words') # imports from random import randint import numpy as np import torch from numpy import dot, absolute from numpy.linalg import norm
#save the dataframe(pdf) data into csv save_to_csv(df, PARSE_DATA_CSVS + t[0] + ".csv") print("CSV saved") except Exception as e: print('ERROR:', e, pdf_file_name_without_ext) traceback.print_exc() finally: print("Clean up working files...") shutil.rmtree(input_pdf_images_path, ignore_errors=True) shutil.rmtree(input_images_blocks_path, ignore_errors=True) end_time = time.time() return pdf_file_name_without_ext, end_time - begin_time if __name__ == '__main__': print('Tesseract Version:', pytesseract.get_tesseract_version()) print('multiprocessing cpu_count:', multiprocessing.cpu_count()) print('os cpu_count:', os.cpu_count()) print('sched_getaffinity:', len(os.sched_getaffinity(0))) #a_pool = multiprocessing.Pool(multiprocessing.cpu_count()) #results = a_pool.map(pdf_process, state_pdfs_files) with futures.MPIPoolExecutor() as executor: results = executor.map(pdf_process, state_pdfs_files) for res in results: print(res)
def getthreads(): if threads == 'auto': return(len(sched_getaffinity(0))) return(threads)
def num_cpus() -> int: "Get number of cpus" try: return len(os.sched_getaffinity(0)) except AttributeError: return os.cpu_count()
def train(args): """Training helper.""" if not args.model.lower() in ['cbow', 'skipgram']: logging.error('Unsupported model %s.', args.model) sys.exit(1) if args.data.lower() == 'toy': data = mx.gluon.data.SimpleDataset(nlp.data.Text8(segment='train')[:2]) data, vocab, idx_to_counts = preprocess_dataset( data, max_vocab_size=args.max_vocab_size) elif args.data.lower() == 'text8': data = nlp.data.Text8(segment='train') data, vocab, idx_to_counts = preprocess_dataset( data, max_vocab_size=args.max_vocab_size) elif args.data.lower() == 'fil9': data = nlp.data.Fil9(max_sentence_length=10000) data, vocab, idx_to_counts = preprocess_dataset( data, max_vocab_size=args.max_vocab_size) elif args.data.lower() == 'wiki': data, vocab, idx_to_counts = wiki(args.wiki_root, args.wiki_date, args.wiki_language, args.max_vocab_size) if args.ngram_buckets > 0: data, batchify_fn, subword_function = transform_data_fasttext( data, vocab, idx_to_counts, cbow=args.model.lower() == 'cbow', ngram_buckets=args.ngram_buckets, ngrams=args.ngrams, batch_size=args.batch_size, window_size=args.window, frequent_token_subsampling=args.frequent_token_subsampling) else: subword_function = None data, batchify_fn = transform_data_word2vec( data, vocab, idx_to_counts, cbow=args.model.lower() == 'cbow', batch_size=args.batch_size, window_size=args.window, frequent_token_subsampling=args.frequent_token_subsampling) num_tokens = float(sum(idx_to_counts)) model = CBOW if args.model.lower() == 'cbow' else SG embedding = model(token_to_idx=vocab.token_to_idx, output_dim=args.emsize, batch_size=args.batch_size, num_negatives=args.negative, negatives_weights=mx.nd.array(idx_to_counts), subword_function=subword_function) context = get_context(args) embedding.initialize(ctx=context) if not args.no_hybridize: embedding.hybridize(static_alloc=True, static_shape=True) optimizer_kwargs = dict(learning_rate=args.lr) try: trainer = mx.gluon.Trainer(embedding.collect_params(), args.optimizer, optimizer_kwargs) except ValueError as e: if args.optimizer == 'groupadagrad': logging.warning('MXNet <= v1.3 does not contain ' 'GroupAdaGrad support. Falling back to AdaGrad') trainer = mx.gluon.Trainer(embedding.collect_params(), 'adagrad', optimizer_kwargs) else: raise e try: if args.no_prefetch_batch: data = data.transform(batchify_fn) else: from executors import LazyThreadPoolExecutor num_cpu = len(os.sched_getaffinity(0)) ex = LazyThreadPoolExecutor(num_cpu) except (ImportError, SyntaxError, AttributeError): # Py2 - no async prefetching is supported logging.warning( 'Asynchronous batch prefetching is not supported on Python 2. ' 'Consider upgrading to Python 3 for improved performance.') data = data.transform(batchify_fn) num_update = 0 prefetched_iters = [] for _ in range(min(args.num_prefetch_epoch, args.epochs)): prefetched_iters.append(iter(data)) for epoch in range(args.epochs): if epoch + len(prefetched_iters) < args.epochs: prefetched_iters.append(iter(data)) data_iter = prefetched_iters.pop(0) try: batches = ex.map(batchify_fn, data_iter) except NameError: # Py 2 or batch prefetching disabled batches = data_iter # Logging variables log_wc = 0 log_start_time = time.time() log_avg_loss = 0 for i, batch in enumerate(batches): ctx = context[i % len(context)] batch = [array.as_in_context(ctx) for array in batch] with mx.autograd.record(): loss = embedding(*batch) loss.backward() num_update += loss.shape[0] if len(context) == 1 or (i + 1) % len(context) == 0: trainer.step(batch_size=1) # Logging log_wc += loss.shape[0] log_avg_loss += loss.mean().as_in_context(context[0]) if (i + 1) % args.log_interval == 0: # Forces waiting for computation by computing loss value log_avg_loss = log_avg_loss.asscalar() / args.log_interval wps = log_wc / (time.time() - log_start_time) # Due to subsampling, the overall number of batches is an upper # bound num_batches = num_tokens // args.batch_size if args.model.lower() == 'skipgram': num_batches = (num_tokens * args.window * 2) // args.batch_size else: num_batches = num_tokens // args.batch_size logging.info('[Epoch {} Batch {}/{}] loss={:.4f}, ' 'throughput={:.2f}K wps, wc={:.2f}K'.format( epoch, i + 1, num_batches, log_avg_loss, wps / 1000, log_wc / 1000)) log_start_time = time.time() log_avg_loss = 0 log_wc = 0 if args.eval_interval and (i + 1) % args.eval_interval == 0: with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update) # Evaluate with print_time('mx.nd.waitall()'): mx.nd.waitall() with print_time('evaluate'): evaluate(args, embedding, vocab, num_update, eval_analogy=not args.no_eval_analogy) # Save params with print_time('save parameters'): embedding.save_parameters(os.path.join(args.logdir, 'embedding.params'))
test_finished_join_timeout = 15 retries = parse_digit_env("RETRIES", 0) debug = os.getenv("DEBUG", "n").lower() in ["gdb", "gdbserver"] debug_core = os.getenv("DEBUG", "").lower() == "core" compress_core = framework.BoolEnvironmentVariable("CORE_COMPRESS") step = framework.BoolEnvironmentVariable("STEP") force_foreground = framework.BoolEnvironmentVariable("FORCE_FOREGROUND") run_interactive = debug or step or force_foreground try: num_cpus = len(os.sched_getaffinity(0)) except AttributeError: num_cpus = multiprocessing.cpu_count() shm_free = psutil.disk_usage('/dev/shm').free print('OS reports %s available cpu(s). Free shm: %s' % ( num_cpus, "{:,}MB".format(shm_free / (1024 * 1024)))) test_jobs = os.getenv("TEST_JOBS", "1").lower() # default = 1 process if test_jobs == 'auto': if run_interactive: concurrent_tests = 1 print('Interactive mode required, running on one core') else: shm_max_processes = 1 if shm_free < min_req_shm:
def main(): n_envs = len(os.sched_getaffinity(0)) factory = FallingEnvFactory() # factory = HalfCheetahEnvFactory() # factory = HumanoidFallingEnvFactory() env: Env = factory.make_env() envs: VectorEnv = AsyncVectorEnv([factory.make_env for _ in range(n_envs)]) env_container = EnvContainer(env, envs) state_dim, = env.observation_space.shape action_dim, = env.action_space.shape relu = nn.ReLU() tanh = nn.Tanh() identity = nn.Identity() actor = ProbMLPConstantLogStd(state_dim, action_dim, [256, 256], relu, tanh, -1.0) critic = MultiLayerPerceptron(state_dim, 1, [256, 256], relu, identity) scaler_ = StandardScaler() print("Fit scaler") env.reset() state_seq = [] for _ in tqdm(range(512)): action = env.action_space.sample() state, _, done, _ = env.step(action) state_seq.append(state) if done: env.reset() state_seq = np.stack(state_seq) scaler_.fit(state_seq) scaler = ScalerNet(scaler_) module_dict = ModuleDict() module_dict.set(ModuleKey.actor, actor) module_dict.set(ModuleKey.scaler, scaler) module_dict.set(ModuleKey.critic, critic) action_getter: ActionGetter = ActionGetterModule(actor, scaler) sample_collector: SampleCollector = SampleCollectorV0(env_container, action_getter, 2048, 1) mse_loss = nn.MSELoss() critic_tensor_inserter: TensorInserter = \ TensorInserterTensorize(ArrayKey.states, TensorKey.states_tensor) + \ TensorInserterTensorize(ArrayKey.log_probs, TensorKey.log_probs_tensor) + \ TensorInserterTensorize(ArrayKey.cumulative_rewards, TensorKey.cumulative_rewards_tensor) + \ TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.states_tensor) + \ TensorInserterForward(TensorKey.states_tensor, ModuleKey.critic, TensorKey.cumulative_reward_predictions_tensor) critic_loss_calculator: LossCalculator = \ LossCalculatorInputTarget(TensorKey.cumulative_reward_predictions_tensor, TensorKey.cumulative_rewards_tensor, mse_loss) actor_tensor_inserter: TensorInserter = \ TensorInserterTensorize(ArrayKey.states, TensorKey.states_tensor) + \ TensorInserterTensorize(ArrayKey.actions, TensorKey.actions_tensor) + \ TensorInserterTensorize(ArrayKey.log_probs, TensorKey.log_probs_tensor) + \ TensorInserterTensorize(ArrayKey.cumulative_rewards, TensorKey.cumulative_rewards_tensor) + \ TensorInserterForward(TensorKey.states_tensor, ModuleKey.scaler, TensorKey.states_tensor) + \ TensorInserterForward(TensorKey.states_tensor, ModuleKey.critic, TensorKey.cumulative_reward_predictions_tensor) + \ TensorInserterLambda([TensorKey.cumulative_rewards_tensor, TensorKey.cumulative_reward_predictions_tensor], lambda x, y: x - y, TensorKey.advantages_tensor) + \ TensorInserterModuleLambda(ModuleKey.actor, [TensorKey.states_tensor, TensorKey.actions_tensor], lambda actor, state, action: actor.get_log_prob(state, action), TensorKey.new_log_probs_tensor) + \ TensorInserterLambda([TensorKey.new_log_probs_tensor, TensorKey.log_probs_tensor, TensorKey.advantages_tensor], get_ppo_surrogate_tensor, TensorKey.ppo_surrogates_tensor) actor_loss_calculator: LossCalculator = \ LossCalculatorLambda([TensorKey.ppo_surrogates_tensor], lambda x: -torch.mean(x)) actor_optimizer = RAdam(params=actor.parameters(), lr=3e-4) actor_updater: ModuleUpdater = ModuleUpdaterOptimizer(actor_optimizer) critic_optimizer = RAdam(params=critic.parameters(), lr=3e-4) critic_updater: ModuleUpdater = ModuleUpdaterOptimizer(critic_optimizer) actor_trainee = Trainee([actor], actor_updater, actor_tensor_inserter, actor_loss_calculator, 10) critic_trainee = Trainee([critic], critic_updater, critic_tensor_inserter, critic_loss_calculator, 10) trainer = RLTrainer(sample_collector, [critic_trainee, actor_trainee], 100000, 128) trainer.train(module_dict)
def _parallel_record_metadata(self, files: List[File]) -> List[Metadata]: pool = Pool(len(sched_getaffinity(0))) metadata = pool.map(self._get_record_metadata, files) pool.close() pool.join() return metadata
def test(root_dir, install_only, debug, max_dim, short, no_pretty, spy, prof, gcov, hdf5, cuda, openmp, python, jobs, env): if 'TRAVIS' in env: install_threads = ['-j', '2'] test_threads = ['-j', '2'] else: # the multiprocessing pool in test.py will use os.cpu_count() which # assumes we only need one core/test (it's really 2+) and that there's # no cpu core restrictions (which can happen if multiple test runners share # a single physical node), so do the math ourselves try: num_cores = len(os.sched_getaffinity(0)) except AttributeError: # macos doesn't have sched_getaffinity num_cores = multiprocessing.cpu_count() install_threads = ['-j', str(num_cores)] # assume a non-empty LAUNCHER means we're running 2 processes/test if env.get('LAUNCHER'): cores_per_test = 4 else: cores_per_test = 2 num_tests = 1 + ((num_cores - 1) // cores_per_test) test_threads = ['-j', str(num_tests)] terra = ['--with-terra', env['TERRA_DIR']] if 'TERRA_DIR' in env else [] build = (['--with-cmake-build', env['CMAKE_BUILD_DIR']] if env.get('USE_CMAKE') == '1' and 'CMAKE_BUILD_DIR' in env else []) debug_flag = ['--debug'] if debug else [] max_dim_flag = ['--max-dim=%s' % max_dim] short_flag = ['--short'] if short else [] no_pretty_flag = ['--no-pretty'] if no_pretty else [] inner_flag = ['--extra=-flegion-inner', '--extra=0' ] if 'DISABLE_INNER' in env else [] if 'USE_RDIR' in env: regent_dir = os.path.dirname(os.path.realpath(__file__)) rdir_config = os.path.join(regent_dir, '.rdir.json') if env['USE_RDIR'] == '1' and not os.path.exists(rdir_config): rdir = 'auto' else: rdir = 'skip' else: rdir = 'auto' subprocess.check_call([sys.executable, './install.py', '--rdir=%s' % rdir] + install_threads + terra + build + debug_flag, env=env, cwd=root_dir) if not install_only: extra_flags = [] if spy: extra_flags.append('--spy') if prof: extra_flags.append('--prof') if gcov: extra_flags.append('--run') if hdf5: extra_flags.append('--hdf5') if cuda: extra_flags.append('--cuda') test_threads = ['-j', '1'] # do not oversubscribe GPU if openmp: extra_flags.append('--openmp') if python: extra_flags.append('--python') extra_flags.extend(['--extra=-fjobs', '--extra=%s' % jobs]) # FIXME: this breaks on newer versions of Terra # if not spy and not prof and not gcov and not hdf5 and not openmp and not cuda: # extra_flags.append('--debug') if prof and 'TMP_BIN_DIR' in env: extra_flags.append( '--legion-prof-rs=%s' % (os.path.join(env['TMP_BIN_DIR'], 'legion_prof'))) subprocess.check_call([sys.executable, './test.py', '-q'] + test_threads + max_dim_flag + short_flag + no_pretty_flag + extra_flags + inner_flag, env=env, cwd=root_dir)
def cpu_count(): # type: () -> Optional[int] # The set of CPUs accessible to the current process (pid 0). cpu_set = os.sched_getaffinity(0) return len(cpu_set)
def get_worker_num(): try: worker_num = len(os.sched_getaffinity(0)) except AttributeError: worker_num = os.cpu_count() or 1 return worker_num
def complete_search(): variant = True if "--help" in input_args: print( "This is the automated search process that goes from raw input up to the post-analysis of results." ) print( "These are the flags that must be used in order to run this function:" ) print("\t--genome, used to specify the reference genome folder") print( "\t--vcf, used to specify the file containing a list of VCF folders (one per line) [OPTIONAL!]" ) print( "\t--guide, used to specify the file that contains guides used for the search [IF NOT --sequence]" ) print( "\t--sequence, used to specify the file containing DNA sequences or bed coordinates to extract guides [IF NOT --guide]" ) print("\t--pam, used to specify the file that contains the pam") print( "\t--annotation, used to specify the file that contains annotations of the reference genome" ) print( "\t--personal_annotation, used to specify the file that contains personal annotations of the reference genome" ) print( "\t--samplesID, used to specify the file with a list of files (one per line) containing the information about samples present in VCF files [OPTIONAL!]" ) print( "\t--gene_annotation, used to specify a gencode or similar annotation to find nearest gene for each target found [OPTIONAL]" ) print( "\t--bMax, used to specify the number of bulges for the indexing of the genome(s)" ) print( "\t--mm, used to specify the number of mismatches permitted in the search phase" ) print( "\t--bDNA, used to specify the number of DNA bulges permitted in the search phase [OPTIONAL!]" ) print( "\t--bRNA, used to specify the number of RNA bulges permitted in the search phase [OPTIONAL!]" ) print( "\t--output, used to specify the output name for the results (these results will be saved into Results/<name>)" ) print( "\t--thread, used to set the number of thread used in the process (default is ALL available minus 2)" ) exit(0) # check if all directories are found, if not, create them directoryCheck() if '--guide' not in input_args and '--sequence' not in input_args: print('Please input a guide file or a sequence file') exit(1) if '--guide' in input_args and '--sequence' in input_args: print( 'Please select only ONE input type, either --guide or --sequence') exit(1) # guide check if "--guide" in input_args: try: guidefile = os.path.abspath( input_args[input_args.index("--guide") + 1]) except IndexError: print("Please input some parameter for flag --guide") exit(1) if not os.path.isfile(guidefile): print("The file specified for --guide does not exist") exit(1) # sequence check sequence_use = False if '--sequence' in input_args: try: sequence_file = os.path.abspath( input_args[input_args.index("--sequence") + 1]) sequence_use = True except IndexError: print("Please input some parameter for flag --sequence") exit(1) if not os.path.isfile(sequence_file): print("The file specified for --sequence does not exist") exit(1) if "--genome" not in input_args: print("--genome must be contained in the input") exit(1) else: try: genomedir = os.path.abspath( input_args[input_args.index("--genome") + 1]) except IndexError: print("Please input some parameter for flag --genome") exit(1) if not os.path.isdir(genomedir): print("The folder specified for --genome does not exist") exit(1) if "--thread" not in input_args: # print("--thread must be contained in the input") # exit(1) thread = len(os.sched_getaffinity(0)) - 2 else: try: thread = input_args[input_args.index("--thread") + 1] except IndexError: print("Please input some parameter for flag --thread") exit(1) try: thread = int(thread) except: print("Please input a number for flag thread") exit(1) if thread <= 0 or thread > len(os.sched_getaffinity(0)) - 2: print("thread is set to default (ALL available minus 2)") thread = len(os.sched_getaffinity(0)) - 2 # exit(1) if "--vcf" not in input_args: variant = False else: try: vcfdir = os.path.realpath(input_args[input_args.index("--vcf") + 1]) except IndexError: print("Please input some parameter for flag --vcf") exit(1) if not os.path.isfile(vcfdir): print("The file specified for --vcf does not exist") exit(1) if "--gene_annotation" not in input_args: gene_annotation = 'no' else: try: gene_annotation = os.path.abspath( input_args[input_args.index("--gene_annotation") + 1]) except IndexError: print("Please input some parameter for flag --gene_annotation") exit(1) if not os.path.isfile(gene_annotation): print("The file specified for --gene_annotation does not exist") exit(1) if "--pam" not in input_args: print("--pam must be contained in the input") exit(1) else: try: pamfile = os.path.abspath(input_args[input_args.index("--pam") + 1]) except IndexError: print("Please input some parameter for flag --pam") exit(1) if not os.path.isfile(pamfile): print("The file specified for --pam does not exist") exit(1) if "--annotation" not in input_args: print("--annotation not used") annotationfile = script_path + 'vuoto.txt' # exit(1) else: try: annotationfile = os.path.abspath( input_args[input_args.index("--annotation") + 1]) except IndexError: print("Please input some parameter for flag --annotation") exit(1) if not os.path.isfile(annotationfile): print("The file specified for --annotation does not exist") exit(1) if '--personal_annotation' in input_args: try: personal_annotation_file = os.path.abspath( input_args[input_args.index("--personal_annotation") + 1]) except: pass if not os.path.isfile(personal_annotation_file): print( "The file specified for --personal_annotation does not exist" ) exit(1) os.system( f'awk \'$4 = $4\"_personal\"\' {personal_annotation_file} | sed "s/ /\t/g" | sed "s/,/_personal,/g" > {personal_annotation_file}.tmp' ) os.system( f'cat {personal_annotation_file}.tmp {annotationfile} > {annotationfile}+personal.bed' ) os.system(f'rm -f {personal_annotation_file}.tmp') annotationfile = annotationfile + '+personal.bed' if '--personal_annotation' in input_args and '--annotation' not in input_args: try: personal_annotation_file = os.path.abspath( input_args[input_args.index("--personal_annotation") + 1]) except: pass if not os.path.isfile(personal_annotation_file): print( "The file specified for --personal_annotation does not exist") exit(1) os.system( f'awk \'$4 = $4\"_personal\"\' {personal_annotation_file} | sed "s/ /\t/g" | sed "s/,/_personal,/g" > {personal_annotation_file}.tmp' ) os.system( f'cat {personal_annotation_file}.tmp {annotationfile} > {annotationfile}+personal.bed' ) os.system(f'rm -f {personal_annotation_file}.tmp') annotationfile = annotationfile + '+personal.bed' if variant and "--samplesID" not in input_args: print("--samplesID must be contained in the input") exit(1) elif not variant and "--samplesID" in input_args: print( "--samplesID was in the input but no VCF directory was specified") exit(1) elif "--samplesID" in input_args: try: samplefile = os.path.abspath( input_args[input_args.index("--samplesID") + 1]) except IndexError: print("Please input some parameter for flag --samplesID") exit(1) if not os.path.isfile(samplefile): print("The file specified for --samplesID does not exist") exit(1) if "--bMax" not in input_args: print("--bMax must be contained in the input") exit(1) else: try: bMax = input_args[input_args.index("--bMax") + 1] except IndexError: print("Please input some parameter for flag --bMax") exit(1) try: bMax = int(bMax) except: print("Please input a number for flag bMax") exit(1) # if bMax < 0 or bMax > 2: # print("The range for bMax is from 0 to 2") # exit(1) if "--mm" not in input_args: print("--mm must be contained in the input") exit(1) else: try: mm = input_args[input_args.index("--mm") + 1] except IndexError: print("Please input some parameter for flag --mm") exit(1) try: mm = int(mm) except: print("Please input a number for flag mm") exit(1) if "--bDNA" not in input_args: # print("--bDNA must be contained in the input") # exit(1) bDNA = 0 else: try: bDNA = input_args[input_args.index("--bDNA") + 1] except IndexError: print("Please input some parameter for flag --bDNA") exit(1) try: bDNA = int(bDNA) except: print("Please input a number for flag bDNA") exit(1) if bDNA > bMax: print("The number of bDNA must be equal or less than bMax") exit(1) elif bDNA < 0 or bDNA > bMax: print("The range for bDNA is from 0 to", bMax) exit(1) if "--bRNA" not in input_args: # print("--bRNA must be contained in the input") # exit(1) bRNA = 0 else: try: bRNA = input_args[input_args.index("--bRNA") + 1] except IndexError: print("Please input some parameter for flag --bRNA") exit(1) try: bRNA = int(bRNA) except: print("Please input a number for flag bRNA") exit(1) if bRNA > bMax: print("The number of bRNA must be equal or less than bMax") exit(1) # elif bRNA < 0 or bRNA > 2: # print("The range for bRNA is from 0 to", bMax) # exit(1) if "--merge" not in input_args: merge_t = 3 # default merge is 3 nt else: try: merge_t = input_args[input_args.index("--merge") + 1] except IndexError: print("Please input some parameter for flag --merge") exit(1) try: merge_t = int(merge_t) except: print("Please input a number for flag merge") exit(1) if merge_t < 0: print("Please specify a positive number for --merge") exit(1) if "--output" not in input_args: print("--output must be contained in the input") exit(1) else: try: outputfolder = current_working_directory+'Results/' + \ input_args[input_args.index("--output")+1] if not os.path.exists(outputfolder): os.makedirs(outputfolder) # outputfolder = os.path.abspath( # input_args[input_args.index("--output")+1]) except IndexError: print("Please input some parameter for flag --output") exit(1) if not os.path.isdir(outputfolder): print("The folder specified for --output does not exist") exit(1) pam_len = 0 total_pam_len = 0 with open(pamfile, 'r') as pam_file: pam_char = pam_file.readline() total_pam_len = len(pam_char.split(' ')[0]) index_pam_value = pam_char.split(' ')[-1] if int(pam_char.split(' ')[-1]) < 0: end_idx = int(pam_char.split(' ')[-1]) * (-1) pam_char = pam_char.split(' ')[0][0:end_idx] pam_len = end_idx pam_begin = True else: end_idx = int(pam_char.split(' ')[-1]) pam_char = pam_char.split(' ')[0][end_idx * (-1):] pam_len = end_idx pam_begin = False genome_ref = os.path.basename(genomedir) annotation_name = os.path.basename(annotationfile) nuclease = os.path.basename(pamfile).split('.')[0].split('-')[2] if bMax != 0: search_index = True else: search_index = False if variant: genome_idx_list = [] with open(vcfdir, 'r') as vcfs: for line in vcfs: if line.strip(): if line[-2] == "/": line = line[:-2] base_vcf = os.path.basename(line) genome_idx_list.append(pam_char + '_' + str(bMax) + '_' + genome_ref + '+' + base_vcf.strip()) genome_idx = ','.join(genome_idx_list) ref_comparison = True else: genome_idx = pam_char + '_' + str(bMax) + '_' + genome_ref ref_comparison = False # os.chdir(script_path) with open(outputfolder + '/Params.txt', 'w') as p: p.write('Genome_selected\t' + genome_ref.replace(' ', '_') + '\n') p.write('Genome_ref\t' + genome_ref + '\n') if search_index: p.write('Genome_idx\t' + genome_idx + '\n') else: p.write('Genome_idx\t' + 'None\n') p.write('Pam\t' + pam_char + '\n') p.write('Max_bulges\t' + str(bMax) + '\n') p.write('Mismatches\t' + str(mm) + '\n') p.write('DNA\t' + str(bDNA) + '\n') p.write('RNA\t' + str(bRNA) + '\n') p.write('Annotation\t' + str(annotation_name) + '\n') p.write('Nuclease\t' + str(nuclease) + '\n') # p.write('Gecko\t' + str(gecko_comp) + '\n') p.write('Ref_comp\t' + str(ref_comparison) + '\n') p.close() len_guide_sequence = total_pam_len - pam_len if sequence_use: guides = list() text_sequence = str() for line in open(sequence_file, 'r'): text_sequence += line for name_and_seq in text_sequence.split('>'): if '' == name_and_seq: continue name = name_and_seq[:name_and_seq.find('\n')] seq = name_and_seq[name_and_seq.find('\n'):] # seq = seq.strip().split() # seq = ''.join(seq) seq = seq.strip() # name, seq = name_and_seq.strip().split('\n') if 'chr' in seq: # extracted_seq = extract_seq.extractSequence( # name, seq, genome_ref.replace(' ', '_')) for single_row in seq.split('\n'): if '' == single_row: continue pieces_of_row = single_row.strip().split() seq_to_extract = pieces_of_row[0]+":" + \ pieces_of_row[1]+"-"+pieces_of_row[2] extracted_seq = extractSequence( name, seq_to_extract, genome_ref.replace(' ', '_')) guides.extend( getGuides(extracted_seq, pam_char, len_guide_sequence, pam_begin)) else: seq = seq.split() seq = ''.join(seq) extracted_seq = seq.strip() guides.extend( getGuides(extracted_seq, pam_char, len_guide_sequence, pam_begin)) temp_guides = list() for guide in guides: addN = 'N' * pam_len if pam_begin: temp_guides.append(addN + guide) else: temp_guides.append(guide + addN) if len(temp_guides) > 1000000000: temp_guides = temp_guides[:1000000000] guides = temp_guides extracted_guides_file = open(outputfolder + '/guides.txt', 'w') for guide in guides: extracted_guides_file.write(guide + '\n') extracted_guides_file.close() # print(guides) # exit(0) if sequence_use == False: os.system(f'cp {guidefile} {outputfolder}/guides.txt') print( f"Launching job {outputfolder}. The stdout is redirected in log_verbose.txt and stderr is redirected in log_error.txt" ) if variant: with open(f"{outputfolder}/log_verbose.txt", 'w') as log_verbose: with open(f"{outputfolder}/log_error.txt", 'w') as log_error: subprocess.run([ script_path + './submit_job_automated_new_multiple_vcfs.sh', str(genomedir), str(vcfdir), str(outputfolder) + "/guides.txt", str(pamfile), str(annotationfile), str(samplefile), str(bMax), str(mm), str(bDNA), str(bRNA), str(merge_t), str(outputfolder), str(script_path), str(thread), str(current_working_directory), str(gene_annotation) ], stdout=log_verbose, stderr=log_error) else: with open(f"{outputfolder}/log_verbose.txt", 'w') as log_verbose: with open(f"{outputfolder}/log_error.txt", 'w') as log_error: subprocess.run([ script_path + './submit_job_automated_new_multiple_vcfs.sh', str(genomedir), '_', str(outputfolder) + "/guides.txt", str(pamfile), str(annotationfile), str(script_path + 'vuoto.txt'), str(bMax), str(mm), str(bDNA), str(bRNA), str(merge_t), str(outputfolder), str(script_path), str(thread), str(current_working_directory), str(gene_annotation) ], stdout=log_verbose, stderr=log_error) # change name of guide and param files to hidden os.system(f"mv {outputfolder}/guides.txt {outputfolder}/.guides.txt") os.system(f"mv {outputfolder}/Params.txt {outputfolder}/.Params.txt")
import numpy as np from simplexTheory import * import simplexUtilities import multiprocessing as mp from datetime import datetime import time import os gamma = 2 isDegreeCorrelated = False type = "power-law" minDegree = 50 maxDegreeList = np.linspace(100, 1000, 37) exponentList = np.linspace(2.5, 4.0, 31) numProcesses = len(os.sched_getaffinity(0)) print("Number of cores is " + str(numProcesses)) digits = 4 tolerance = 0.0001 option = "fast" minAlpha = 0.0 maxAlpha = 0.1 m = np.size(maxDegreeList, 0) n = np.size(exponentList, 0) betaCritGrid = np.zeros([m, n]) expansionRatioGrid = np.zeros([m, n]) argList = list() for i in range(m): for j in range(n):
def build_project(project_dir, num_processes=0): if num_processes == 0: num_processes = len((os.sched_getaffinity(0))) subprocess.check_call(['ndk-build', '-j%d' % num_processes, '-C', project_dir])
if __name__ == "__main__": start_time = time.time() parser = argparse.ArgumentParser(description="Tool to turn raw data into videos.") parser.add_argument("input", metavar="input_file", type=str, help="The file to be turned into a video file.") parser.add_argument("output", metavar="output_file", type=str, help="The location of the resulting video file.") parser.add_argument("--fps", metavar="fps", type=float, help="FPS of the video, keep it very low (between 0.25-6) to avoid compression artifacts. Use 6 if you want to upload the file to YouTube.", default=1.0) parser.add_argument("--width", metavar="width", type=int, help="Width of the video.", default=3840) parser.add_argument("--height", metavar="height", type=int, help="Height of the video.", default=2160) parser.add_argument("--video_codec", metavar="video_codec", type=str, help="Tells ffmpeg which video encoder to use.", default="libx264") parser.add_argument("--crf", metavar="crf", type=int, help="Quality of the video (constant rate factor). *Lower* values will increase quality (therefore less compression artifacts) and file size. Might not work with every video codec.", default=24) parser.add_argument("--pixel_size", metavar="pixel_size", type=int, help="The size each pixel is supposed to be. Larger sizes will create much larger video files but might be more resilient against compression.", default=1) parser.add_argument("--color_palette", metavar="color_palette", type=int, help="The color palette size to use. The smaller the size, less colors make the video more resilient against compression but also make the video bigger and encoding/decoding slower.", default=2) parser.add_argument("--ecc_bytes",metavar="ecc_bytes", type=int, help="Determines the amount of Error Correction Code (ECC) bytes that will be used in a 128 bytes segment. More bytes = slightly bigger file size, slightly longer en/decoding time, more resilience against compression.", default=12) parser.add_argument("--threads", metavar="threads", type=int, help="Amount of threads to use for encoding. Defaults to all available cores.", default=len(os.sched_getaffinity(0))) print("IMPORTANT: THIS TOOL COMES WITH NO WARRANTY WHATSOEVER. USE AT YOUR OWN RISK.") args = parser.parse_args() if args.width * args.height < 1024: raise ValueError("The video must have atleast 1024 pixels per frame!") if args.threads < 1: raise ValueError("Threads must be atleast 1!") if args.ecc_bytes < 1: raise ValueError("There must be atleast 1 ECC byte per 128 bytes!") if args.ecc_bytes > 127:
def save_graphs_multiprocess(plots, provinces_name, provinces_abbr): total_saves = len(plots) * len(provinces_name) path = pathlib.Path(__file__).parent fr = list() with concurrent.futures.ProcessPoolExecutor( max_workers=max(1, len(os.sched_getaffinity(0)) - 1) ) as executor: # to avoid making the system completely unusable, I have set it to nproc -1 for k in plots: for i, (p_name, p_abbr) in enumerate(zip(provinces_name, provinces_abbr)): sys.stdout.flush() if k == "infect": fr.append( executor.submit( plots[k][p_abbr].save_plot, f'Covid new infections per day in {p_name} {p_abbr}', 'Day', 'New infections', path / 'Covid')) elif k == "infect_n": fr.append( executor.submit( plots[k][p_abbr].save_plot, f'Covid new infections per day in {p_name} {p_abbr} normalized', 'Day', 'New infections', path / 'Covid_n')) elif k == "infects per tests": fr.append( executor.submit( plots[k][p_abbr].save_plot, f'Covid infections per tests in {p_name} {p_abbr}', 'Day', '% new infections/tests', path / 'Covid_infection_per_test_est')) elif k == "infects per tests_n": fr.append( executor.submit( plots[k][p_abbr].save_plot, f'Covid infections per tests in {p_name} {p_abbr} normalized', 'Day', '% new infections/tests', path / 'Covid_infection_per_test_est_n')) elif k == "tests": fr.append( executor.submit( plots[k][p_abbr].save_plot, f'Covid estimated tests per day in {p_name} {p_abbr}', 'Day', 'Tests', path / 'Covid_Tests_est')) elif k == "tests_n": fr.append( executor.submit( plots[k][p_abbr].save_plot, f'Covid estimated tests per day in {p_name} {p_abbr} normalized', 'Day', 'Tests', path / 'Covid_Tests_est_n')) j = 0 sys.stdout.write(f"\r0% done") sys.stdout.flush() for future in concurrent.futures.as_completed(fr): try: future.result() finally: j += 1 sys.stdout.write(f"\r{int(j / total_saves * 100)}% done") sys.stdout.flush()
def get_cpu_count(): return len(os.sched_getaffinity(0))
def train(env, max_iters, num_episodes, horizon, iw_norm, bound, delta, gamma, seed, policy, max_offline_iters, aggregate, adaptive_batch, njobs=1): # Create the environment def make_env(): env_gym = gym.make(env).unwrapped return env_gym # Create the policy if policy == 'linear': hid_layers = [] elif policy == 'nn': hid_layers = [100, 50, 25] if aggregate == 'none': learner = pbpois PolicyClass = PeMlpPolicy elif aggregate == 'neuron': learner = nbpois PolicyClass = MultiPeMlpPolicy else: print("Unknown aggregation method, defaulting to none") learner = pbpois PolicyClass = PeMlpPolicy make_policy = lambda name, observation_space, action_space: PolicyClass( name, observation_space, action_space, hid_layers, use_bias=True, seed=seed) sampler = ParallelSampler(make_env, make_policy, gamma, horizon, np.ravel, num_episodes, njobs, seed) try: affinity = len(os.sched_getaffinity(0)) except: affinity = njobs sess = U.make_session(affinity) sess.__enter__() set_global_seeds(seed) gym.logger.setLevel(logging.WARN) learner.learn(make_env, make_policy, sampler, gamma=gamma, n_episodes=num_episodes, horizon=horizon, max_iters=max_iters, verbose=1, feature_fun=np.ravel, iw_norm=iw_norm, bound=bound, max_offline_iters=max_offline_iters, delta=delta, center_return=False, line_search_type='parabola', adaptive_batch=adaptive_batch) sampler.close()
def cpu_executor(self): return ThreadPoolExecutor(max_workers = len(sched_getaffinity(0)))