def flatten_dataset(X, flat_type_mapping, schema_hints=None): if schema_hints is None: schema_hints = {} flat_X = [] for x in X: flat = flatdict.FlatterDict(x, delimiter=DELIMITER) c_x = copy.deepcopy(x) for k in flat_type_mapping.keys(): col_type = schema_hints.get(k, {}).get('col_type') if col_type not in [None, COL_HINT.POSITIONAL]: continue v = flat.get(k) if v is not None: sort = schema_hints.get(k, {}).get('sort', False) if sort: type_ = flat_type_mapping[k] if type_ == 'array': item_types = flat_type_mapping.get(k + '.items') a = get_by_path(c_x, k.split('.')) if isinstance(item_types, list): try: a = sorted(a) except: print( '# Warning: mix-type array with types: {}'. format(', '.join(item_types))) print( '# Warning; no comparison operator provided. Try to assess the proper cast...' ) for t in type_priority: try: a = list(map(type_priority[t], a)) print('# Casting \'{}\' to {}'.format( k, t)) break except: log.error( 'Could not cast \'{}\' to {}'. format(k, t)) else: print( '# Error: Could not find any way to sort {}' .format(k)) raise Exception( 'Could not find any way to sort {}'. format(k)) set_by_path(c_x, k.split('.'), sorted(a)) flat = flatdict.FlatterDict(c_x, delimiter=DELIMITER) flat_X.append(flat) return flat_X
def create_game(width, height): import flatdict import random player = '😎' grass = '🌱' data = flatdict.FlatterDict( { "boards": { "forrest": { "actions": { "😎": { "🌱": { "name": "stand", "kwargs": {} } } }, "grid": { f'{y}': {f'{x}': [grass] for x in range(width)} for y in range(height) } } } }, delimiter=".", ) x, y = random.randint(0, width - 1), random.randint(0, height - 1) data[f"boards.forrest.grid.{y}.{x}"]["1"] = player return data, x, y
def _get_flattened_dictionary_from_ros_msg(msg): """ Return a flattened python dict from a ROS message :param msg: ROS msg instance :return: Flattened dict """ return flatdict.FlatterDict(convert_ros_message_to_dictionary(msg), delimiter="/")
def dict_to_table(dct, options={}, human_readable=True): flat = flatdict.FlatterDict(dct) columnOrder = options.get("columnOrder", []) table = "<table>" remainingColumns = set(v for v in flat.keys()) newColumns = [] for columnOrderItem in columnOrder: columnOrderItem = (columnOrderItem.replace("]", ":").replace( "[", ":").replace(".", ":")) possibleColumns = [ v for v in remainingColumns if v == columnOrderItem or v.startswith(columnOrderItem + ":") ] if len(possibleColumns) > 0: newColumns += sorted(possibleColumns) remainingColumns -= set(possibleColumns) if len(newColumns) == 0: newColumns = sorted(remainingColumns) for key in newColumns: value = flat[key] if human_readable: key = human_readable_key(key) table += "<tr><th>{}</th><td>{}</td></tr>".format(key, value) table += "</table>" return table
def fill_string_from_template(response, templateText): flat = flatdict.FlatterDict(response.value) for i in flat: flat[human_readable_key(i)] = flat.pop(i) kwargs = dict(serialize_model(response), response=flat) if kwargs.get("modify_link", None): kwargs["view_link"] = kwargs["modify_link"] + "&mode=view" msgBody = env.from_string(templateText).render(**kwargs) return msgBody
def get_liked_tracks_df(spotify): total_pages_saved_songs = get_saved_track_page_count(spotify) liked_tracks = list( chain.from_iterable([ get_saved_tracks(spotify, page_num) for page_num in tqdm(list(range(total_pages_saved_songs))) ])) flattened_liked_tracks = [ dict(flatdict.FlatterDict(track)) for track in liked_tracks ] full_liked_tracks_df = pd.DataFrame(flattened_liked_tracks) track_col_renames = { "track:album:album_type": "album_type", "track:album:artists:0:external_urls:spotify": "album_artist_spurl", "track:album:artists:0:id": "album_artist_spid", "track:album:artists:0:name": "album_artist_name", "track:album:artists:0:type": "album_artist_type", "track:album:external_urls:spotify": "album_spurl", "track:album:id": "album_spid", "track:album:images:0:url": "album_img_url", "track:album:name": "album_name", "track:album:release_date": "album_release_date", "track:album:total_tracks": "album_tracks_count", "track:album:type": "album_track_type", "track:artists:0:external_urls:spotify": "artist_spurl", "track:artists:0:id": "artist_spid", "track:artists:0:name": "artist_name", "track:artists:0:type": "artist_type", "track:duration_ms": "track_duration_ms", "track:explicit": "track_explicit", "track:external_ids:isrc": "track_isrc", "track:external_urls:spotify": "track_spurl", "track:id": "track_spid", "track:is_local": "track_is_local", "track:name": "track_name", "track:popularity": "track_popularity", "track:preview_url": "track_preview_url", "track:track_number": "track_number", "track:type": "track_type", } des_tracks_cols = ["added_at"] + list(track_col_renames.values()) liked_tracks_df = full_liked_tracks_df.rename(track_col_renames, axis=1)[des_tracks_cols] liked_tracks_df["interaction_style"] = "Liked Songs" liked_tracks_df["time_pulled"] = datetime.datetime.now( datetime.timezone.utc).isoformat() return liked_tracks_df
def get_flat_domain_mapping(X, flat_type_mapping): flat_domain_mapping = {} for x in X: flat = flatdict.FlatterDict(x, delimiter='.') for k in flat_type_mapping.keys(): v = flat.get(k) if v is not None: if k not in flat_domain_mapping: flat_domain_mapping[k] = set() type_ = flat_type_mapping[k] try: if type_ == 'array': flat_domain_mapping[k].update( get_by_path(x, k.split('.'))) else: flat_domain_mapping[k].add(get_by_path( x, k.split('.'))) except: if not flat_domain_mapping[k]: del flat_domain_mapping[k] for k in flat_domain_mapping: flat_domain_mapping[k] = list(flat_domain_mapping[k]) return flat_domain_mapping
def cleanEntries(entries, language, environments): ''' Clean up workflow and environment info Flattening the dictionary as well ''' entriesArr = [] for entry in entries: if (language != entry['locale']) and not entry['publish_details']: continue # We don't need unpublished and unlocalized items envArr = [] try: for environment in entry['publish_details']: envArr.append((environments[environment['environment']], environment['locale'])) except KeyError: config.logging.warning( 'Information about environment(s) missing. Might be missing user permissions.' ) del entry['publish_details'] workflow = '' if '_workflow' in entry: try: workflow = entry['_workflow']['name'] del entry['_workflow'] except KeyError: workflow = 'Not available' config.logging.warning( 'Information about workflow stage missing. Might be missing user permissions.' ) entry = flatdict.FlatterDict(entry) entry.set_delimiter('.') entry = dict(entry) entry['publish_details'] = envArr entry['_workflow'] = workflow entriesArr.append(entry) return entriesArr
def error_list_exception_handler(exc, context, delimiter='/'): """ Enhanced version of the default DRF exception handler that consolidates all of the error detail dicts into a list, and nests that list under a top level 'errors' key. For example: ``` { 'errors': [ { 'code': 'required', 'message': 'This field is required.', 'source': '/name' }, { 'code': 'required', 'message': 'This field is required.', 'source': '/value' }, ] } ``` """ def update_key_for_renderer(key, view, request): renderer, media_type = view.perform_content_negotiation( request, force=True) if type(renderer).__name__ == 'CamelCaseJSONRenderer': try: from djangorestframework_camel_case.util import camelize return list(camelize({key: None}).keys())[0] except ImportError: warnings.warn( 'djangorestframework-camel-case is not installed, ' 'source keys may not render properly' ) return key # convert Django 404s and 403s into the DRF equivalents, this is needed so # we can get the full details of the exception if isinstance(exc, Http404): exc = exceptions.NotFound() elif isinstance(exc, PermissionDenied): exc = exceptions.PermissionDenied() # process the exception by the default exception handler to get the # response that we need to edit, if that handler can't process it, then # return None resp = views.exception_handler(exc, context) if resp is None: return details = exc.get_full_details() if not isinstance(exc, exceptions.ValidationError) or isinstance(details, list): # noqa: E501 # case 1) # exception is a validation error or the validation error is top level if not isinstance(details, list): details = [details] resp.data = {'errors': details} return resp # case 2) the validation errors are nested underneath field # name keys def fix_int_keys(obj): if not isinstance(obj, dict): return obj return {str(k): fix_int_keys(v) for k, v in obj.items()} # some keys maybe ints and not strings, run this function to convert all # keys to strings details = fix_int_keys(details) flattened = flatdict.FlatterDict(details, delimiter) fields = {} # the error data can be nested into an arbitrary number of levels because # of nested serializers, so first build up a dict of all source fields for key, value in flattened.items(): # if value is empty, its just a placeholder, so ignore if value: # use rsplit to build from the back, so the last 2 items are # guaranteed to be the list index and the error key (code, message, # etc). That leaves the entire first item of the tuple as a # pointer to the source field field, idx, attr = key.rsplit(delimiter, 2) if field not in fields: fields[field] = {} if idx not in fields[field]: fields[field][idx] = {} fields[field][idx].update({attr: value}) errors = [] for field, data in sorted(fields.items()): # with the dict of source fields to data errors, ungroup the index keys # and add the error to the list if field == settings.api_settings.NON_FIELD_ERRORS_KEY: # TODO: should resetting the field be parameterized field = '' if 'view' in context and 'request' in context: field = update_key_for_renderer( field, context['view'], context['request']) for idx, err in data.items(): if field: field = '{}{}'.format(delimiter, field) err['source'] = field errors.append(err) resp.data = {'errors': errors} return resp
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model-dir', type=str, required=True) parser.add_argument('--name', type=str) parser.add_argument('--load-params', type=str) parser.add_argument('--load-optim', type=str) args = parser.parse_args() cfg_path = os.path.join(args.model_dir, 'config.yaml') cfg = Configuration.from_yaml_file(cfg_path) global neptune if neptune: try: neptune.init() neptune.create_experiment(args.name or args.model_dir, upload_source_files=[], params=dict( flatdict.FlatterDict(cfg.get(), delimiter='.'))) except neptune.exceptions.NeptuneException: neptune = None traceback.print_exc() seed = cfg.get('seed', 0) np.random.seed(seed) torch.random.manual_seed(seed) representation, start_id, end_id = cfg.configure(make_representation) print('Vocab size:', len(representation.vocab)) def encode(music: muspy.Music): encoded = representation.encode(music) encoded = np.concatenate([[start_id], encoded]) return encoded data_train = muspy.MusicDataset(cfg.get('train_data_path')) data_train = cfg['data_augmentation'].configure(AugmentedDataset, dataset=data_train, seed=seed) data_train_pt = data_train.to_pytorch_dataset(factory=encode) model = cfg['model'].configure( MusicPerformer, n_token=len(representation.vocab), ).to(DEVICE) train_loader = cfg['data_loader'].configure(DataLoader, dataset=data_train_pt, collate_fn=functools.partial( collate_padded, pad_value=end_id, max_len=model.max_len), batch_size=1, shuffle=True, num_workers=24) val_loaders = {} if cfg['val_data_paths']: val_loaders = { name: cfg['val_data_loader'].configure( DataLoader, dataset=muspy.MusicDataset(path).to_pytorch_dataset( factory=encode), collate_fn=functools.partial(collate_padded, pad_value=end_id, max_len=model.max_len), batch_size=1, shuffle=False, num_workers=24) for name, path in cfg.get('val_data_paths').items() } cfg['training'].configure(train, model=model, ckpt_dir=args.model_dir, pretrained_param_path=args.load_params, optimizer_path=args.load_optim, train_dloader=train_loader, val_dloaders=val_loaders, pad_index=end_id) cfg.get_unused_keys(warn=True)