def parallelize_func(iterable, func, chunksz=1, n_jobs=16, *args, **kwargs): """ Parallelize a function over each element of an iterable. """ chunker = func chunks = more_itertools.chunked(iterable, chunksz) chunks_results = Parallel(n_jobs=n_jobs, verbose=50)( delayed(chunker)(chunk, *args, **kwargs) for chunk in chunks) results = more_itertools.flatten(chunks_results) return list(results)
def main(): graphs = pickle.load(open('pickle/graphs', 'rb')) client = MongoClient() db = client['github'] pageranks = db['pageranks'] pagerank_maps = list(flatten(map(gen_pagerank_maps, graphs))) pickle.dump(graphs, open('pickle/graphs-pageranks', 'wb'), 2) pageranks.insert(pagerank_maps)
def main(args): # get the arguments method = args.method win_size = args.win_size step = args.step metric_name = args.metric_name n_jobs = args.workers # Load the data. L, H, olddf, newdf = pickle.load(open(args.filename)) words = pd.Series(olddf.word.values.ravel()).unique() oldrows = [] newrows = [] sourcexrange = np.arange(args.mint, args.maxt, step) destxrange = np.arange(args.mint, args.maxt, step) if method == 'win': sourcexrange = sourcexrange[win_size:] destxrange = destxrange[:-win_size] if args.interpolate: sourcexinter = np.arange(sourcexrange[0], sourcexrange[-1] + 1, 1) destxinter = np.arange(destxrange[0], destxrange[-1] + 1, 1) else: sourcexinter = sourcexrange destxinter = destxrange # Construct the series assert(len(sourcexinter) == len(destxinter)) chunk_sz = np.ceil(len(words)/float(n_jobs)) words_chunks = more_itertools.chunked(words, chunk_sz) timeseries_chunks = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, create_word_time_series, olddf, newdf, sourcexinter, destxinter, metric_name=metric_name, interpolate=args.interpolate) for chunk in words_chunks) timeseries = list(more_itertools.flatten(timeseries_chunks)) # Dump the data frame for orow, newrow in timeseries: if orow and newrow: oldrows.append(orow) newrows.append(newrow) oldtimeseries = pd.DataFrame() newtimeseries = pd.DataFrame() header = ['word'] header.extend(sourcexinter) newheader = ['word'] newheader.extend(destxinter) oldtimeseries = oldtimeseries.from_records(oldrows, columns=header) oldtimeseries = oldtimeseries.fillna(method='backfill', axis=1) newtimeseries = newtimeseries.from_records(newrows, columns=newheader) newtimeseries = newtimeseries.fillna(method='backfill', axis=1) oldtimeseries.to_csv(args.sourcetimef, encoding='utf-8') newtimeseries.to_csv(args.endtimef, encoding='utf-8')
def initialize(docs, *args, **qsargs): state = { 'num_topics': None, # K 'ss': { 'document_topic': None, # n_{m,k} 'topic_term': None, # n_{k,t} 'topic': None, # n_k 'doc': None, # n_m }, 'doc_word_topic_assignment': None, # z_{m,n} 'docs': None, 'num_docs': None, 'used_topics': None, # U1 'tau': None, # mean of the 2nd level DP / sample from first level DP 'vocabulary': None, 'alpha': None, # Concentration parameter for second level DP (providing # distribution over topics (term distributions) that will be drawn for each doc) 'beta': None, # Parameter of root Dirichlet distribution (over terms) 'gamma': None, # Concentration parameter for root DP (from which a finite number # of topic/term distributions will be drawn) 'topic_term_distribution': None, # Phi 'document_topic_distribution': None, # Theta } state['num_topics'] = 4 state['docs'] = vectorize(docs, *args, **qsargs) state['vocabulary'] = set(more_itertools.flatten(state['docs'])) state['num_docs'] = len(state['docs']) state['num_terms'] = len(state['vocabulary']) state['doc_word_topic_assignment'] = defaultdict(lambda: defaultdict(int)) state['ss']['document_topic'] = defaultdict(lambda: defaultdict(int)) state['ss']['topic_term'] = defaultdict(lambda: defaultdict(int)) state['ss']['topic'] = defaultdict(int) state['ss']['doc'] = defaultdict(int) state['used_topics'] = set(range(state['num_topics'])) for doc_index, doc in enumerate(state['docs']): for word_index, term in enumerate(doc): probabilities = state['num_topics'] * [1. / state['num_topics']] topic = choice(list(state['used_topics']), p=probabilities) assert topic != DUMMY_TOPIC state['doc_word_topic_assignment'][doc_index][word_index] = topic state['ss']['document_topic'][doc_index][topic] += 1 state['ss']['topic_term'][topic][term] += 1 state['ss']['topic'][topic] += 1 state['ss']['doc'][doc_index] += 1 state['tau'] = {s: (1. / state['num_topics']) for s in state['used_topics']} state['tau'][DUMMY_TOPIC] = state['tau'].values().pop() state['alpha'], state['beta'], state['gamma'] = 1, 1, 1 topics = set(state['used_topics']) for topic in topics: state = cleanup_topic(state, topic) state = sample_tau(state) return state
def getfixtureinfo(self, node, func, cls, funcargs=True): if funcargs and not getattr(node, "nofuncargs", False): argnames = getfuncargnames(func, cls=cls) else: argnames = () usefixtures = flatten( mark.args for mark in node.iter_markers(name="usefixtures") ) initialnames = argnames initialnames = tuple(usefixtures) + initialnames fm = node.session._fixturemanager names_closure, arg2fixturedefs = fm.getfixtureclosure(initialnames, node) return FuncFixtureInfo(argnames, names_closure, arg2fixturedefs)
def scrape_candidates(self, product_name, archive_directory, major_version): """Scrape the candidates/ directory for beta, release candidate, and final releases.""" url_path = '/pub/%s/candidates/' % archive_directory # First, let's look at /pub/PRODUCT/releases/ so we know what final # builds have been released release_path = '/pub/%s/releases/' % archive_directory release_path_content = self.download(release_path) # Get the final release version numbers, so something like "64.0b8/" -> "64.0b8" final_releases = [ link['text'].rstrip('/') for link in self.get_links(release_path_content) if link['text'][0].isdigit() ] content = self.download(url_path) version_links = [ link for link in self.get_links(content) if link['text'][0].isdigit() ] # If we've got a major_version, then we only want to scrape data for versions # greater than (major_version - 4) and esr builds if major_version: major_version_minus_4 = major_version - 4 logger.info('Skipping anything before %s and not esr', major_version_minus_4) version_links = [ link for link in version_links if ( # "63.0b7-candidates/" -> 63 int(link['text'].split('.')[0]) >= major_version_minus_4 or 'esr' in link['text'] ) ] scrape = partial( self.scrape_candidate_version, product_name=product_name, final_releases=final_releases ) if self.num_workers == 1: build_data = map(scrape, version_links) else: with concurrent.futures.ProcessPoolExecutor(max_workers=self.num_workers) as executor: build_data = executor.map(scrape, version_links, timeout=300) # build_data is a list of lists so we flatten that return list(more_itertools.flatten(build_data))
def __init__(self, pattern): self.pattern = pattern self.keys = self.keyMatcher.findall(pattern) self.keySet = OrderedSet(self.keys) - {'sep'} self.optMatches = self.optMatcher.findall(self.pattern) optKeys = mit.flatten(map(self.keyMatcher.findall, self.optMatches)) self.optKeySet = OrderedSet(optKeys) - {'sep'} self.optParts = dict(zip(self.optKeySet, self.optMatches)) self.reqKeySet = self.keySet - self.optKeySet self.has_opt = bool(len(self.optKeySet)) if self.has_opt: self.base = pattern[:pattern.index('<')] else: self.base = self.pattern
def _linearize( self, dependency_tree: DependencyTree, head_node: DependencyTreeToken) -> Iterable[DependencyTreeToken]: """ We recursively linearize the tree by linearizing all the sub-trees rooted by its modifiers and then ordering them relative to the head and each other. """ nodes_to_order = list(dependency_tree.modifiers(head_node)) # we need to consider the head node alongside its dependents because the head # will in general be positioned in the midst of them. We use the pseudo-dependency # HEAD to distinguish it. nodes_to_order.append((head_node, HEAD)) if len(nodes_to_order) == 1: # the head has no modifiers, so there is nothing to order return (head_node, ) role_order = self._head_pos_to_role_order[head_node.part_of_speech] def position(node: Tuple[DependencyTreeToken, DependencyRole]) -> int: role = node[1] try: return role_order.index(role) except ValueError: raise RuntimeError( f"Do not know how to order modifiers with role " f"{role} relative to head of POS tag " f"{head_node.part_of_speech}. We know how to handle the " f"following roles: {role_order}") nodes_in_order = sorted(nodes_to_order, key=position) return flatten( self._linearize(dependency_tree, node) # don't recurse infinitely by trying to process the head word again if dependency != HEAD else (node, ) for (node, dependency) in nodes_in_order)
def find_similar_values_in_database(self, potential_values): matching_values = set() table_text_column_mapping = self._get_text_columns( self.database_schema) conn = sqlite3.connect(str(self.database_path.resolve())) cursor = conn.cursor() for table, columns in table_text_column_mapping.items(): if columns: query = self._assemble_query(columns, table) data = self.fetch_data(query, cursor) # The overhead of parallelization only helps after a certain size of data. Example: a table with ~ 300k entries and 4 columns takes ~20s with a single core. # By using all 12 virtual cores we get down to ~12s. But the table has only 60k entries and 4 columns, the overhead of parallelization is larger than calculating # everything on a single core (~3.8s vs. ~4.1s) if len(data) > 80000: matches = Parallel(n_jobs=NUM_CORES)( delayed(self._find_matches_in_column)( table, column, column_idx, data, potential_values) for column_idx, column in enumerate(columns)) print( f'Parallelization activated as table has {len(data)} rows.' ) else: matches = [ self._find_matches_in_column(table, column, column_idx, data, potential_values) for column_idx, column in enumerate(columns) ] matching_values.update(flatten(matches)) conn.close() return self._top_n_results(matching_values)
def main(input, part): # Iterator of lines lines = map(lambda x: x.strip(), input.readlines()) # Iterator of key-value pair strings entries = flatten(map(lambda x: x.split(" "), lines)) # Iterator of key-value pair tuples entries = map(lambda x: x.split(":"), entries) # Iterator of lists of key-value pairs (split on empty string) blocks = split_at(entries, lambda x: x == [""]) # Iterator of dicts dicts = map(dict, blocks) # Start applying filters, and print length dicts = filter(filter_required_keys, dicts) if part == "2": dicts = filter(filter_birth_year, dicts) dicts = filter(filter_issue_year, dicts) dicts = filter(filter_expire_year, dicts) dicts = filter(filter_height, dicts) dicts = filter(filter_hair_color, dicts) dicts = filter(filter_eye_color, dicts) dicts = filter(filter_passport_id, dicts) print(ilen(dicts))
def shap_sums(self): #TODO: rewrite shap_positive_sums = pd.DataFrame(np.vstack([ np.sum(more_or_value(v, 0.0, 0.0), axis=0) for v in self.shap_values ]).T, index=self.partitions.X_T.index) shap_positive_sums = shap_positive_sums.rename( columns={c: f"plus_shap_{c}" for c in shap_positive_sums.columns}) shap_negative_sums = pd.DataFrame(np.vstack([ np.sum(less_or_value(v, 0.0, 0.0), axis=0) for v in self.shap_values ]).T, index=self.partitions.X_T.index) shap_negative_sums = shap_negative_sums.rename( columns={c: f"minus_shap_{c}" for c in shap_negative_sums.columns}) sh_cols = [ c for c in flatten(zip(shap_positive_sums, shap_negative_sums)) ] shap_sums = shap_positive_sums.join(shap_negative_sums)[sh_cols] return shap_sums
def __init__(self, input_space: gym.spaces.Box, dims: Sequence[int]): super().__init__() checkraise( isinstance(input_space, gym.spaces.Box) and len(input_space.shape) == 1, TypeError, 'input_space must be Box', ) checkraise( len(dims) > 0, ValueError, 'dims must be non-empty', ) (input_dim,) = input_space.shape self.dims = list(itt.chain([input_dim], dims)) modules = mitt.flatten( (make_module('linear', 'relu', in_dim, out_dim), nn.ReLU()) for in_dim, out_dim in mitt.pairwise(self.dims) ) self.model = nn.Sequential(*modules)
def convert_1d_array(arrays: List[object]): """ Usage:: >>> convert_1d_array([1,2,3, [[1,1,23],2,3]]) [1, 2, 3, 1, 1, 23, 2, 3] """ import more_itertools as itr arrays = copy.deepcopy(arrays) for i, x in enumerate(arrays): if not (isinstance(x, list) or isinstance(x, tuple)): arrays[i] = [x] arrays = list(itr.flatten(arrays)) i = 0 if len(arrays) > 0: while(1): if isinstance(arrays[i], list) or isinstance(arrays[i], tuple): arrays = convert_1d_array(arrays) i = 0 else: i += 1 if len(arrays) == i: break return arrays
def make_main_model_data(dataset_names): subsets_name = '_'.join(sorted(dataset_names)) data = list(flatten(load_dataset(d) for d in dataset_names)) data_to_train_discourse_plan_ranker = [ t for t in data if len(t.triples) > 1 and t.entity_map ] data = make_data(data_to_train_discourse_plan_ranker) extractors = {} for k, v in data.items(): X_raw = [x[0] for x in v] y = [x[1] for x in v] ef = SentenceAggregationFeatures().fit(X_raw, y) X = ef.transform(X_raw) data = np.c_[np.array(X), y] data = np.unique(data, axis=0) sa_data_filename = f'sa_data_{subsets_name}_{k}' sa_data_filepath = os.path.join(PRETRAINED_DIR, sa_data_filename) np.save(sa_data_filepath, data) extractors[k] = ef sa_extractor_filename = f'sa_extractor_{subsets_name}' sa_extractor_filepath = os.path.join(PRETRAINED_DIR, sa_extractor_filename) with open(sa_extractor_filepath, 'wb') as f: pickle.dump(extractors, f)
def _make_far_training( num_samples: Optional[int], noise_objects: Optional[int], language_generator: LanguageGenerator[HighLevelSemanticsSituation, LinearizedDependencyTree], ) -> Phase1InstanceGroup: figure_0 = standard_object("ball", BALL) figure_1 = standard_object("book", BOOK) figure_2 = standard_object("dad", DAD) ground_0 = standard_object("cookie", COOKIE) ground_1 = standard_object("table", TABLE) ground_2 = standard_object("person", PERSON, banned_properties=[IS_SPEAKER, IS_ADDRESSEE]) figures = immutableset([figure_0, figure_1, figure_2]) grounds = immutableset([ground_0, ground_1, ground_2]) return phase1_instances( "Preposition Training Far", chain(*[ flatten([ sampled( _far_template( figure, ground, make_noise_objects(noise_objects), is_training=True, ), ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), max_to_sample=num_samples if num_samples else 5, ) for figure in figures for ground in grounds ]) ]), language_generator=language_generator, )
def _make_beside_training( num_samples: Optional[int], noise_objects: Optional[int], language_generator: LanguageGenerator[HighLevelSemanticsSituation, LinearizedDependencyTree], ) -> Phase1InstanceGroup: figure_0 = standard_object("ball", BALL) figure_1 = standard_object("book", BOOK) figure_2 = standard_object("mom", MOM) ground_0 = standard_object("cookie", COOKIE) ground_1 = standard_object("table", TABLE) ground_2 = standard_object("dad", DAD) figures = immutableset([figure_0, figure_1, figure_2]) grounds = immutableset([ground_0, ground_1, ground_2]) return phase1_instances( "Preposition Training Beside", chain(*[ flatten([ sampled( _beside_template( figure, ground, make_noise_objects(noise_objects), is_right=True, is_training=True, ), ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), max_to_sample=num_samples if num_samples else 5, ) for figure in figures for ground in grounds # for direction in BOOL_SET ]) ]), language_generator=language_generator, )
def solve_part2(): ingredients = [line.split(" (contains ")[0].split() for line in lines] allergens = [line.split(" (contains ")[1][:-1].split(", ") for line in lines] foods = list(zip(ingredients, allergens)) possible_allergens = sorted(set(list(flatten(allergens)))) possible_ingredients = [] for allergen in possible_allergens: pi = [] for ingres, algs in foods: if allergen in algs: pi.append(set(ingres)) possible_ingredients.append((allergen, pi)) common_possible_ingredients = [ (ps[0], set.intersection(*ps[1])) for ps in possible_ingredients] copied_common_possible_ingredients = deepcopy(common_possible_ingredients) result_dict = defaultdict(str) idx = 0 while idx < 5: for i, c in enumerate(common_possible_ingredients): if len(c[1]) == 0: continue if len(c[1]) == 1: result_dict[c[0] ] = copied_common_possible_ingredients[i][1].pop() else: for val in c[1]: if val in result_dict.values(): copied_common_possible_ingredients[i][1].discard(val) if len(copied_common_possible_ingredients[i][1]) == 1: result_dict[c[0] ] = copied_common_possible_ingredients[i][1].pop() common_possible_ingredients = deepcopy( copied_common_possible_ingredients) idx += 1 return ",".join([i[1] for i in sorted(result_dict.items())])
def make_jump_imprecise_temporal_descriptions( num_samples: Optional[int], noise_objects: Optional[int], language_generator: LanguageGenerator[HighLevelSemanticsSituation, LinearizedDependencyTree], ) -> Phase1InstanceGroup: jumper = standard_object( "jumper_0", THING, required_properties=[CAN_JUMP], banned_properties=[IS_SPEAKER, IS_ADDRESSEE], ) background = make_noise_objects(noise_objects) return phase1_instances( "jumping", chain( flatten([ sampled( # "A person jumps" make_jump_template( jumper, use_adverbial_path_modifier=use_adverbial_path_modifier, spatial_properties=[FAST] if is_fast else [SLOW], background=background, ), ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), max_to_sample=num_samples if num_samples else 5, ) for use_adverbial_path_modifier in (True, False) for is_fast in BOOL_SET ])), language_generator=language_generator, )
def make_walk_run_subtle_verb_distinction( num_samples: Optional[int], noise_objects: Optional[int], language_generator: LanguageGenerator[HighLevelSemanticsSituation, LinearizedDependencyTree], ) -> Phase1InstanceGroup: agent = standard_object( "walker_0", THING, required_properties=[ANIMATE], banned_properties=[IS_SPEAKER, IS_ADDRESSEE], ) background = make_noise_objects(noise_objects) return phase1_instances( "walking-running", chain( flatten([ sampled( make_walk_run_template( agent, use_adverbial_path_modifier=use_adverbial_path_modifier, operator=operator, spatial_properties=[HARD_FORCE] if hard_force else [SOFT_FORCE], background=background, ), ontology=GAILA_PHASE_1_ONTOLOGY, chooser=PHASE1_CHOOSER_FACTORY(), max_to_sample=num_samples if num_samples else 5, ) for use_adverbial_path_modifier in BOOL_SET for hard_force in BOOL_SET for operator in [AWAY_FROM, TOWARD] ])), language_generator=language_generator, )
async def run( self, subtree_uuid: str, delete_functions: bool, keep_functions: List[str] = [] ): org_uuid = await self.get_org_uuid() tree = await self.get_tree(org_uuid) subtree = self.find_subtree(subtree_uuid, tree) print("Deleting subtree for {}".format(subtree_uuid)) unit_uuids = self.get_tree_uuids(subtree) await self.delete_from_lora(unit_uuids, "organisation/organisationenhed") if delete_functions: print("Deleting associated org functions for subtree".format(subtree_uuid)) funktionsnavne = [] if keep_functions: funktionsnavne = [f for f in all_functionnames if f not in keep_functions] org_func_uuids = await self.get_associated_org_funcs( unit_uuids, funktionsnavne=funktionsnavne ) await self.delete_from_lora( flatten(org_func_uuids), "organisation/organisationfunktion" ) print("Done")
def create_split(df, study_ids, config): x = [] gt = [] for study_id in tqdm(study_ids): # Get slices for current study_id study_df = df[df.study_id == study_id].sort_values('slice_num') study_preds = study_df[config.pred_columns].to_numpy() study_gt = study_df[config.gt_columns].to_numpy() study_preds = np.pad(study_preds, ((config.num_slices // 2, config.num_slices // 2), (0, 0))) new_indices = list( flatten(windowed(range(study_preds.shape[0]), config.num_slices))) study_x = study_preds[new_indices].reshape(study_gt.shape[0], config.predictions_in) if config.append_area_feature: study_areas = study_df['area'].to_numpy() study_areas = np.pad( study_areas, ((config.num_slices // 2, config.num_slices // 2), )) study_areas = study_areas[new_indices].reshape( study_gt.shape[0], config.num_slices) study_x = np.concatenate((study_x, study_areas), axis=1) x.append(study_x) gt.append(study_gt) x = np.concatenate(x) gt = np.concatenate(gt) return x, gt
def fit(self, X, y=None): self.freq_parts = Counter() self.freq_partitions = Counter() for agg in X: a_agg = abstract_triples(flatten(agg)) self.freq_partitions[a_agg] += 1 for agg_part in agg: self.freq_parts[abstract_triples(agg_part)] += 1 self.total_parts = sum(self.freq_parts.values()) self.total_partitions = sum(self.freq_partitions.values()) self.feature_names_ = [ 'pct_partition', 'pct_longest_partition', 'freq_parts', 'freq_partition' ] return self
def get_e_address(e_uuid, scope, lc, lc_historic, settings): # Iterator of all addresses in LoRa lora_addresses = lc_historic.addresses.values() lora_addresses = flatten(lora_addresses) # Iterator of all addresses for the current user lora_addresses = filter(lambda address: address['user'] == e_uuid, lora_addresses) # Iterator of all addresses for current user and correct scope lora_addresses = filter(lambda address: address['scope'] == scope, lora_addresses) candidates = lora_addresses if scope == "Telefon": priority_list = settings.get("plan2learn.phone.priority", []) elif scope == "E-mail": priority_list = settings.get("plan2learn.email.priority", []) else: priority_list = [] address = lc_choose_public_address(candidates, priority_list, lc) if address is not None: return address else: return {} # like mora_helpers
def generate_cycle(grid: set): neighbors = set(flatten(map(generate_neighbor_coordinates, grid))) all_coordinates = grid.union(neighbors) filter_fn = partial(filter_survivor, grid=grid) return set(filter(filter_fn, all_coordinates))
def test_train_classification_model(test_output_dirs: TestOutputDirectories, use_mixed_precision: bool) -> None: """ Test training and testing of classification models, asserting on the individual results from training and testing. Expected test results are stored for GPU with and without mixed precision. """ logging_to_stdout(logging.DEBUG) config = ClassificationModelForTesting() config.set_output_to(test_output_dirs.root_dir) # Train for 4 epochs, checkpoints at epochs 2 and 4 config.num_epochs = 4 config.use_mixed_precision = use_mixed_precision config.save_start_epoch = 2 config.save_step_epochs = 2 config.test_start_epoch = 2 config.test_step_epochs = 2 config.test_diff_epochs = 2 expected_epochs = [2, 4] assert config.get_test_epochs() == expected_epochs model_training_result = model_training.model_train(config) assert model_training_result is not None expected_learning_rates = [0.0001, 9.99971e-05, 9.99930e-05, 9.99861e-05] use_mixed_precision_and_gpu = use_mixed_precision and machine_has_gpu if use_mixed_precision_and_gpu: expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737039, 0.736721, 0.736339, 0.735957] else: expected_train_loss = [0.686614, 0.686465, 0.686316, 0.686167] expected_val_loss = [0.737061, 0.736690, 0.736321, 0.735952] def extract_loss(results: List[MetricsDict]) -> List[float]: return [d.values()[MetricType.LOSS.value][0] for d in results] actual_train_loss = extract_loss( model_training_result.train_results_per_epoch) actual_val_loss = extract_loss(model_training_result.val_results_per_epoch) actual_learning_rates = list( flatten(model_training_result.learning_rates_per_epoch)) assert actual_train_loss == pytest.approx(expected_train_loss, abs=1e-6) assert actual_val_loss == pytest.approx(expected_val_loss, abs=1e-6) assert actual_learning_rates == pytest.approx(expected_learning_rates, rel=1e-5) test_results = model_testing.model_test(config, ModelExecutionMode.TRAIN) assert isinstance(test_results, InferenceMetricsForClassification) assert list(test_results.epochs.keys()) == expected_epochs if use_mixed_precision_and_gpu: expected_metrics = { 2: [0.635942, 0.736691], 4: [0.636085, 0.735952], } else: expected_metrics = { 2: [0.635941, 0.736690], 4: [0.636084, 0.735952], } for epoch in expected_epochs: assert test_results.epochs[epoch].values()[MetricType.CROSS_ENTROPY.value] == \ pytest.approx(expected_metrics[epoch], abs=1e-6) # Run detailed logs file check only on CPU, it will contain slightly different metrics on GPU, but here # we want to mostly assert that the files look reasonable if not machine_has_gpu: # Check log EPOCH_METRICS_FILE_NAME epoch_metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / EPOCH_METRICS_FILE_NAME # Auto-format will break the long header line, hence the strange way of writing it! expected_epoch_metrics = \ "loss,cross_entropy,accuracy_at_threshold_05,seconds_per_batch,seconds_per_epoch,learning_rate," + \ "area_under_roc_curve,area_under_pr_curve,accuracy_at_optimal_threshold," \ "false_positive_rate_at_optimal_threshold,false_negative_rate_at_optimal_threshold," \ "optimal_threshold,subject_count,epoch,cross_validation_split_index\n" + \ """0.6866141557693481,0.6866141557693481,0.5,0,0,0.0001,1.0,1.0,0.5,0.0,0.0,0.529514,2.0,1,-1 0.6864652633666992,0.6864652633666992,0.5,0,0,9.999712322065557e-05,1.0,1.0,0.5,0.0,0.0,0.529475,2.0,2,-1 0.6863163113594055,0.6863162517547607,0.5,0,0,9.999306876841536e-05,1.0,1.0,0.5,0.0,0.0,0.529437,2.0,3,-1 0.6861673593521118,0.6861673593521118,0.5,0,0,9.998613801725043e-05,1.0,1.0,0.5,0.0,0.0,0.529399,2.0,4,-1 """ check_log_file(epoch_metrics_path, expected_epoch_metrics, ignore_columns=[ LoggingColumns.SecondsPerBatch.value, LoggingColumns.SecondsPerEpoch.value ]) # Check log METRICS_FILE_NAME metrics_path = config.outputs_folder / ModelExecutionMode.TRAIN.value / METRICS_FILE_NAME metrics_expected = \ """prediction_target,epoch,subject,model_output,label,cross_validation_split_index,data_split Default,1,S4,0.5216594338417053,0.0,-1,Train Default,1,S2,0.5295137763023376,1.0,-1,Train Default,2,S4,0.5214819312095642,0.0,-1,Train Default,2,S2,0.5294750332832336,1.0,-1,Train Default,3,S4,0.5213046073913574,0.0,-1,Train Default,3,S2,0.5294366478919983,1.0,-1,Train Default,4,S4,0.5211275815963745,0.0,-1,Train Default,4,S2,0.5293986201286316,1.0,-1,Train """ check_log_file(metrics_path, metrics_expected, ignore_columns=[])
def test_pursuit_color_attribute(color_node, object_0_node, object_1_node, language_mode, learner): color = property_variable(f"{color_node.handle}", color_node) object_0 = standard_object(f"{object_0_node.handle}", object_0_node, added_properties=[color]) object_1 = standard_object(f"{object_1_node.handle}", object_1_node, added_properties=[color]) color_object_template = _object_with_color_template(object_0, None) templates_with_n_samples = [ (color_object_template, 2), (_object_with_color_template(object_1, None), 4), ] language_generator = phase1_language_generator(language_mode) color_train_curriculum = phase1_instances( f"{color.handle} Color Train", language_generator=language_generator, situations=chain(*[ flatten([ sampled( template, chooser=PHASE1_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=n_samples, block_multiple_of_the_same_type=True, ) for template, n_samples in templates_with_n_samples ]) ]), ) color_test_curriculum = phase1_instances( f"{color.handle} Color Test", situations=sampled( color_object_template, chooser=PHASE1_TEST_CHOOSER_FACTORY(), ontology=GAILA_PHASE_1_ONTOLOGY, max_to_sample=1, block_multiple_of_the_same_type=True, ), language_generator=language_generator, ) processing_learner = learner(language_mode) for ( _, linguistic_description, perceptual_representation, ) in color_train_curriculum.instances(): processing_learner.observe( LearningExample(perceptual_representation, linguistic_description)) for ( _, test_lingustics_description, test_perceptual_representation, ) in color_test_curriculum.instances(): descriptions_from_learner = processing_learner.describe( test_perceptual_representation) gold = test_lingustics_description.as_token_sequence() assert descriptions_from_learner assert gold in [ desc.as_token_sequence() for desc in descriptions_from_learner ]
def undo(carriers: List[Carriers]) -> List[Symbol]: return list(flatten([undo_one(carrier) for carrier in carriers]))
def resolve_args(namespaces): from pathlib import Path # TODO: make so that we can run without science frames args, head_info, names = namespaces # Positional argument and -s / --sci argument mean the same thing if args.files_or_directory and not args.sci: args.sci = args.files_or_directory if args.outdir: # output directory given explicitly args.outdir = iocheck(args.outdir, os.path.exists, 1) # else: # infer output directory from images provided # If input is a directory, process all files in tree! # If outdir given, rebuild the tree for reduced files there. Otherwise # maintain current tree for reduced files. from pySHOC import treeops root = Path(args.sci[0]) if root.is_dir(): # this is a directory try process entire tree! _infer_indir = False # first check if still has day-by-day folders # if next(args.sci[0].glob('[01][0-9][0-3][0-9]'), None): # # try to partition # treeops.partition_by_source(args.sci[0]) # get file tree tree = treeops.get_tree(root, '.fits') flats = tree.pop('flats', tree.pop('flat', None)) bias = tree.pop('bias', None) if not args.flats: args.flats = flats if not args.bias: args.bias = bias # flatten the tree into list of files args.sci = list(mit.flatten(tree.values())) else: _infer_indir = True # Resolve inputs and get the input folder form resolved file list for # sci / flats / bias _infer_outdir = not bool(args.outdir) work_dir = '' for name in ('sci', 'flats', 'bias'): # args.dark # 'sci', images = getattr(args, name) if images: # Resolve the input images images = parse.to_list(images, os.path.exists, include='*.fits', path=work_dir, abspaths=True, raise_error=1) # put resolved list in arg namespace setattr(args, name, images) if _infer_indir: work_dir = Path(images[0]).parent _infer_indir = False if _infer_outdir: args.outdir = os.path.split(images[0])[0] _infer_outdir = False # All inputs should now be resolved to lists of file names if args.sci: # Initialize Run args.sci = shocSciRun.load(args.sci, label='science') # TODO: use kind and set that as label default? # for cube in args.sci: # DO YOU NEED TO DO THIS IN A LOOP? # cube._needs_flip = not cube.cross_check(args.sci[0], 'flip_state') # self-consistency check for flip state of science cubes # #NOTE: THIS MAY BE INEFICIENT IF THE FIRST CUBE IS THE ONLY ONE WITH A DIFFERENT FLIP STATE... # =========================================================================== if args.gps: args.timing = True # Do timing if gps info given if len(args.gps) == 1: # triggers give either as single trigger time string or filename of trigger list valid_gps = iocheck(args.gps[0], validity.RA, raise_error=-1) # if valid single time this will return that same str else None if not valid_gps: args.gps = parse.to_list(args.gps, validity.RA, path=work_dir, abspath=0, sort=0, raise_error=1) # at ths point args.gps is list of explicit time strings. # Check if they are valid representations of time args.gps = [iocheck(g, validity.RA, raise_error=1, convert=convert.RA) for g in args.gps] # Convert and set as cube attribute args.sci.that_need_triggers().set_gps_triggers(args.gps) # if any cubes are GPS triggered on each individual frame grun = args.sci.that_need_kct() if len(args.kct) == 1 and len(grun) != 1: warn( 'A single GPS KCT provided for multiple externally triggered runs. ' 'Assuming this applies for all these files: %s' % grun) args.kct *= len(grun) # expand by repeating elif len(grun) != len(args.kct): l = str(len(args.kct)) or 'No' s = ': %s' % str(args.kct) if len(args.kct) else '' raise ValueError('%s GPS KCT values provided%s for %i file(s): %s' '' % (l, s, len(grun), grun)) # "Please specify KCT (Exposure time + Dead time):") # args.kct = InputCallbackLoop.str(msg, 0.04, check=validity.float, what='KCT') for cube, kct in zip(grun, args.kct): cube.timing.kct = kct # =========================================================================== if args.flats or args.bias: args.combine = list(map(str.lower, args.combine)) hows = 'day', 'daily', 'week', 'weekly' methods = 'sigma clipped', funcs = 'mean', 'median' vocab = hows + methods + funcs transmap = dict(mit.grouper(hows, 2)) understood, misunderstood = map(list, mit.partition(vocab.__contains__, args.combine)) if any(misunderstood): raise ValueError('Argument(s) {} for combine not understood.' ''.format(misunderstood)) else: understood = [transmap.get(u, u) for u in understood] how = next(filter(hows.__contains__, understood)) func = next(filter(funcs.__contains__, understood)) meth = next(filter(methods.__contains__, understood), '') args.combine = how args.fcombine = getattr(np, func) print('\nBias/Flat combination will be done by {}.'.format( ' '.join([how, meth, func]))) # TODO: sigma clipping ... even though it sucks # =========================================================================== if args.flats: # TODO full matching here ... # args.flats = parse.to_list(args.flats, imaccess, path=work_dir, raise_error=1) args.flats = shocFlatFieldRun.load(args.flats, label='flat') # isolate the flat fields that match the science frames. only these will be processed match = args.flats.cross_check(args.sci, 'binning', 1) args.flats = args.flats[match] # check which are master flats # for flat in args.flats: # flat._needs_flip = not flat.cross_check(args.sci[0], 'flip_state') # flag the flats that need to be subframed, based on the science frames which are subframed args.flats.flag_sub(args.sci) args.flats.print_instrumental_setup() # check which of the given flats are potentially master # is_master = [f.ndims == 2 for f in args.flats] # else: # print('WARNING: No flat fielding will be done!') # =========================================================================== if args.bias: # args.bias = parse.to_list(args.bias, imaccess, path=work_dir, raise_error=1) args.bias = shocBiasRun.load(args.bias, label='bias') # match the biases for the science run match4sci = args.bias.cross_check(args.sci, ['binning', 'mode'], 0) # for bias in args.bias: # bias._needs_flip = bias.cross_check(args.sci[0], 'flip_state') # NOTE: THIS MAY BE INEFICIENT IF THE FIRST CUBE IS THE ONLY ONE WITH A DIFFERENT FLIP STATE... # args.bias[match4sci].flag_sub(args.sci) ? args.bias.flag_sub(args.sci) args.bias[match4sci].print_instrumental_setup( description='(for science frames)') # match the biases for the flat run if args.flats: match4flats = args.bias.cross_check(args.flats, ['binning', 'mode'], -1) # args.bias4flats = args.bias[match4flats] # for bias in args.bias4flats: # bias._needs_flip = bias.cross_check(args.flats[0], 'flip_state') # print table of bias frames args.bias[match4flats].print_instrumental_setup( description='(for flat fields)') match = match4sci & match4flats else: match = match4sci args.bias = args.bias[match] # check which of the given flats are potentially master # is_master = [f.ndims == 2 for f in args.flats] # else: # warn( 'No de-biasing will be done!' ) # =========================================================================== if args.split: if args.outdir[0]: # if an output directory is given args.outdir = os.path.abspath(args.outdir[0]) if not os.path.exists(args.outdir): # if it doesn't exist create it print( 'Creating reduced data directory {}.\n'.format( args.outdir)) os.mkdir(args.outdir) # =========================================================================== # Handle header updating here # NOTE: somehow, this attribute gets set even though we can never read it due to a syntax error delattr(head_info, 'update-headers') hi = head_info hi.coords = None # join arguments since they are read as lists hi.object = ' '.join(hi.object) hi.ra = ' '.join(hi.ra) hi.dec = ' '.join(hi.dec) hi.date = ' '.join(hi.date) if args.update_headers: if hi.ra and hi.dec: iocheck(hi.ra, validity.RA, 1) iocheck(hi.dec, validity.DEC, 1) hi.coords = SkyCoord(ra=hi.ra, dec=hi.dec, unit=('h', 'deg')) # , system='icrs' else: from pySHOC.utils import retrieve_coords_ra_dec hi.coords, hi.ra, hi.dec = retrieve_coords_ra_dec(hi.object) # TODO: maybe subclass SkyCoords to calculate this? def is_close(cooA, cooB, threshold=1e-3): return np.less([(cooA.ra - cooB.ra).value, (cooA.dec - cooB.dec).value], threshold).all() for cube in args.sci: # TODO: select instead of loop if cube.has_coords and hi.coords and not is_close(cube.coords, hi.coords): fmt = dict(style='hmsdms', precision=2, sep=' ', pad=1) warn( 'Supplied coordinates {} will supersede header coordinates {} in {}' ''.format(hi.coords.to_string(**fmt), cube.coords.to_string(**fmt), cube.filename())) cube.coords = hi.coords if not hi.date: # hi.date = args.sci[0].date#[c.date for c in args.sci] warn('Dates will be assumed from file creation dates.') # if not hi.filter: # warn('Filter assumed as Empty') # hi.filter = 'Empty' # if hi.epoch: # iocheck(hi.epoch, validity.epoch, 1) # else: # warn('Assuming epoch J2000') # hi.epoch = 2000 # if not hi.obs: # note('Assuming location is SAAO Sutherland observatory.') # hi.obs = 'SAAO' # if not hi.tel: # note('Assuming telescope is SAAO 1.9m\n') #FIXME: Don't have to assume for new data # hi.tel = '1.9m' elif args.timing or args.split: # Need target coordinates for Barycentrization! Check the headers for cube in args.sci: # TODO: select instead of loop if cube.coords is None: warn('Object coordinates not found in header for {}!\n' 'Barycentrization cannot be done without knowing target ' 'coordinates!'.format(cube.filename())) # iocheck( hi.date, validity.DATE, 1 ) # else: # warn( 'Headers will not be updated!' ) # =========================================================================== # if args.timing and not hi.coords: # Target coordinates not provided / inferred from # warn( 'Barycentrization cannot be done without knowing target coordinates!' ) if args.names: shocFlatFieldRun.nameFormat = names.flats shocBiasRun.nameFormat = names.bias shocSciRun.nameFormat = names.sci # ANIMATE return args, head_info, names
(ipaddress.IPv6Interface("::1/128"), ipaddress.IPv6Interface), (decimal.Decimal(10), decimal.Decimal), (datetime.datetime.strptime('Jan 1 2021 1:55PM', '%b %d %Y %I:%M%p'), datetime.datetime), (datetime.datetime.strptime('Jan 1 2021 1:55PM', '%b %d %Y %I:%M%p').date(), datetime.date), ] # these types can only be instantiated on their corresponding system if os.name == "posix": types.append((pathlib.PosixPath('/tmp/foo'), pathlib.PosixPath)) if os.name == "nt": types.append((pathlib.WindowsPath('C:\\tmp'), pathlib.WindowsPath)) if sys.version_info[:3] >= (3, 9, 0): types.extend([([1, 2], list[int]), ({'a': 1}, dict[str, int]), ((1, 1), tuple[int, int])]) types_combinations: List = list(map(lambda c: list(more_itertools.flatten(c)), itertools.combinations(types, 2))) opt_case: List = [ {'reuse_instances_default': False}, {'reuse_instances_default': False, 'rename_all': 'camelcase'}, {'reuse_instances_default': False, 'rename_all': 'snakecase'}, ] def make_id_from_dict(d: Dict) -> str: if not d: return 'none' else: key = list(d)[0] return f'{key}-{d[key]}'
def test_single_level(self): """ensure list of lists is flattened only one level""" f = [[0, [1, 2]], [[3, 4], 5]] self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f)))
def test_basic_usage(self): """ensure list of lists is flattened one level""" f = [[0, 1, 2], [3, 4, 5]] self.assertEqual(list(range(6)), list(mi.flatten(f)))
def scrape_candidates(self, product_name, archive_directory, major_version, stdout): """Scrape the candidates/ directory for beta, release candidate, and final releases.""" url_path = '/pub/%s/candidates/' % archive_directory stdout.write('scrape_candidates working on %s' % url_path) # First, let's look at /pub/PRODUCT/releases/ so we know what final # builds have been released release_path = '/pub/%s/releases/' % archive_directory release_path_content = self.download(release_path) # Get the final release version numbers, so something like "64.0b8/" -> "64.0b8" final_releases = [ link['text'].rstrip('/') for link in self.get_links(release_path_content) if link['text'][0].isdigit() ] content = self.download(url_path) version_links = [ link for link in self.get_links(content) if link['text'][0].isdigit() ] # If we've got a major_version, then we only want to scrape data for versions # greater than (major_version - 4) and esr builds if major_version: major_version_minus_4 = major_version - 4 stdout.write( 'skipping anything before %s and not esr (%s)' % (product_name, major_version_minus_4) ) version_links = [ link for link in version_links if ( # "63.0b7-candidates/" -> 63 int(link['text'].split('.')[0]) >= major_version_minus_4 or 'esr' in link['text'] ) ] scrape = partial( self.scrape_candidate_version, product_name=product_name, final_releases=final_releases ) if self.num_workers == 1: results = map(scrape, version_links) else: with concurrent.futures.ProcessPoolExecutor(max_workers=self.num_workers) as executor: results = executor.map(scrape, version_links, timeout=300) results = list(results) # Convert [(build_data, msgs), (build_data, msgs), ...] into # build_data and msgs if results: build_data, msgs = more_itertools.unzip(results) else: build_data, msgs = [], [] # Print all the msgs to stdout for msg_group in msgs: for msg in msg_group: stdout.write('worker: %s' % msg) # build_data is a list of lists so we flatten that return list(more_itertools.flatten(build_data))
(True, Optional[bool]), (None, Optional[int]), (None, Optional[str]), (None, Optional[float]), (None, Optional[bool]), (Pri(10, 'foo', 100.0, True), Pri), # dataclass (Pri(10, 'foo', 100.0, True), Optional[Pri]), (None, Optional[Pri]), (pathlib.Path('/tmp/foo'), pathlib.Path), # Extended types (pathlib.Path('/tmp/foo'), Optional[pathlib.Path]), (None, Optional[pathlib.Path]), (decimal.Decimal(10), decimal.Decimal), ] types_combinations: List = list( map(lambda c: list(more_itertools.flatten(c)), itertools.combinations(types, 2))) def make_id_from_dict(d: Dict) -> str: if not d: return 'none' else: key = list(d)[0] return f'{key}-{d[key]}' def opt_case_ids(): return map(make_id_from_dict, opt_case)
def reshape3(dataset): return list(flatten(map(lambda x: list(map(clean_knowledge, x["knowledge"])), dataset)))
def reshape2(dataset): return list(flatten(map(lambda x: [[x["history"], x["response"]], x["knowledge"]], dataset)))
def __repr__(self): return f'Reindeer({self.name}, fly={self.fly}, fly_duration={self.fly_duration}, rest_duration={self.rest_duration}, dist={self.dist}, state={self.state}, remaining_time={self.remaining_time})' deers = [] def simultaneous_step(): """Take a step, return winner(s)""" dist = defaultdict(list) for d in deers: d.step() dist[d.dist].append(d.name) winner = dist[max(dist.keys())] # print(f'Winners: {winner} with distance {max(dist.keys())}') return winner # Initialize reindeer objects for data in deer_data: deers.append(Reindeer(*data)) winner_aggregate = [] for i in range(simulation_length): winner_aggregate.append(simultaneous_step()) ctr = Counter(flatten(winner_aggregate)) print('Answer 2:', ctr.most_common(1))
distance = grid[neighbor] old_cost = D[neighbor] new_cost = D[current_vertex] + distance if new_cost < old_cost: heappush(pq, (old_cost+distance, neighbor)) D[neighbor] = new_cost return D if __name__ == "__main__": if True: with open("day15.input") as fp: data = [[int(e) for e in list(line.strip())] for line in fp.readlines()] size = len(data) grid = list(flatten(data)) start = time() D = dijkstra(grid, size, 0) print("Part one:", D[(size*size)-1], "in", time()-start) if True: with open("day15.input") as fp: data = [[int(e) for e in list(line.strip())] for line in fp.readlines()] size = len(data) for n in range(4): for y in range(size): for x in range(size): data[y].append(((data[y][x]+n)%9)+1)
def is_first_turn(board): return all('.' == c for c in mt.flatten(board))