def flatten_dataset(X, flat_type_mapping, schema_hints=None):
    if schema_hints is None:
        schema_hints = {}
    flat_X = []
    for x in X:
        flat = flatdict.FlatterDict(x, delimiter=DELIMITER)
        c_x = copy.deepcopy(x)
        for k in flat_type_mapping.keys():
            col_type = schema_hints.get(k, {}).get('col_type')
            if col_type not in [None, COL_HINT.POSITIONAL]:
                continue
            v = flat.get(k)
            if v is not None:
                sort = schema_hints.get(k, {}).get('sort', False)
                if sort:
                    type_ = flat_type_mapping[k]
                    if type_ == 'array':
                        item_types = flat_type_mapping.get(k + '.items')
                        a = get_by_path(c_x, k.split('.'))
                        if isinstance(item_types, list):
                            try:
                                a = sorted(a)
                            except:
                                print(
                                    '# Warning: mix-type array with types: {}'.
                                    format(', '.join(item_types)))
                                print(
                                    '# Warning; no comparison operator provided. Try to assess the proper cast...'
                                )
                                for t in type_priority:
                                    try:
                                        a = list(map(type_priority[t], a))
                                        print('# Casting \'{}\' to {}'.format(
                                            k, t))
                                        break
                                    except:
                                        log.error(
                                            'Could not cast \'{}\' to {}'.
                                            format(k, t))
                                else:
                                    print(
                                        '# Error: Could not find any way to sort {}'
                                        .format(k))
                                    raise Exception(
                                        'Could not find any way to sort {}'.
                                        format(k))
                            set_by_path(c_x, k.split('.'), sorted(a))
                flat = flatdict.FlatterDict(c_x, delimiter=DELIMITER)
        flat_X.append(flat)
    return flat_X
Example #2
0
def create_game(width, height):
    import flatdict
    import random
    player = '😎'
    grass = '🌱'

    data = flatdict.FlatterDict(
        {
            "boards": {
                "forrest": {
                    "actions": {
                        "😎": {
                            "🌱": {
                                "name": "stand",
                                "kwargs": {}
                            }
                        }
                    },
                    "grid": {
                        f'{y}': {f'{x}': [grass]
                                 for x in range(width)}
                        for y in range(height)
                    }
                }
            }
        },
        delimiter=".",
    )
    x, y = random.randint(0, width - 1), random.randint(0, height - 1)

    data[f"boards.forrest.grid.{y}.{x}"]["1"] = player
    return data, x, y
Example #3
0
def _get_flattened_dictionary_from_ros_msg(msg):
    """
    Return a flattened python dict from a ROS message
    :param msg: ROS msg instance
    :return: Flattened dict
    """
    return flatdict.FlatterDict(convert_ros_message_to_dictionary(msg), delimiter="/")
Example #4
0
def dict_to_table(dct, options={}, human_readable=True):
    flat = flatdict.FlatterDict(dct)
    columnOrder = options.get("columnOrder", [])
    table = "<table>"
    remainingColumns = set(v for v in flat.keys())
    newColumns = []
    for columnOrderItem in columnOrder:
        columnOrderItem = (columnOrderItem.replace("]", ":").replace(
            "[", ":").replace(".", ":"))
        possibleColumns = [
            v for v in remainingColumns
            if v == columnOrderItem or v.startswith(columnOrderItem + ":")
        ]
        if len(possibleColumns) > 0:
            newColumns += sorted(possibleColumns)
            remainingColumns -= set(possibleColumns)
    if len(newColumns) == 0:
        newColumns = sorted(remainingColumns)
    for key in newColumns:
        value = flat[key]
        if human_readable:
            key = human_readable_key(key)
        table += "<tr><th>{}</th><td>{}</td></tr>".format(key, value)
    table += "</table>"
    return table
Example #5
0
def fill_string_from_template(response, templateText):
    flat = flatdict.FlatterDict(response.value)
    for i in flat:
        flat[human_readable_key(i)] = flat.pop(i)
    kwargs = dict(serialize_model(response), response=flat)
    if kwargs.get("modify_link", None):
        kwargs["view_link"] = kwargs["modify_link"] + "&mode=view"
    msgBody = env.from_string(templateText).render(**kwargs)
    return msgBody
Example #6
0
def get_liked_tracks_df(spotify):
    total_pages_saved_songs = get_saved_track_page_count(spotify)
    liked_tracks = list(
        chain.from_iterable([
            get_saved_tracks(spotify, page_num)
            for page_num in tqdm(list(range(total_pages_saved_songs)))
        ]))
    flattened_liked_tracks = [
        dict(flatdict.FlatterDict(track)) for track in liked_tracks
    ]
    full_liked_tracks_df = pd.DataFrame(flattened_liked_tracks)
    track_col_renames = {
        "track:album:album_type": "album_type",
        "track:album:artists:0:external_urls:spotify": "album_artist_spurl",
        "track:album:artists:0:id": "album_artist_spid",
        "track:album:artists:0:name": "album_artist_name",
        "track:album:artists:0:type": "album_artist_type",
        "track:album:external_urls:spotify": "album_spurl",
        "track:album:id": "album_spid",
        "track:album:images:0:url": "album_img_url",
        "track:album:name": "album_name",
        "track:album:release_date": "album_release_date",
        "track:album:total_tracks": "album_tracks_count",
        "track:album:type": "album_track_type",
        "track:artists:0:external_urls:spotify": "artist_spurl",
        "track:artists:0:id": "artist_spid",
        "track:artists:0:name": "artist_name",
        "track:artists:0:type": "artist_type",
        "track:duration_ms": "track_duration_ms",
        "track:explicit": "track_explicit",
        "track:external_ids:isrc": "track_isrc",
        "track:external_urls:spotify": "track_spurl",
        "track:id": "track_spid",
        "track:is_local": "track_is_local",
        "track:name": "track_name",
        "track:popularity": "track_popularity",
        "track:preview_url": "track_preview_url",
        "track:track_number": "track_number",
        "track:type": "track_type",
    }
    des_tracks_cols = ["added_at"] + list(track_col_renames.values())
    liked_tracks_df = full_liked_tracks_df.rename(track_col_renames,
                                                  axis=1)[des_tracks_cols]
    liked_tracks_df["interaction_style"] = "Liked Songs"
    liked_tracks_df["time_pulled"] = datetime.datetime.now(
        datetime.timezone.utc).isoformat()
    return liked_tracks_df
def get_flat_domain_mapping(X, flat_type_mapping):
    flat_domain_mapping = {}
    for x in X:
        flat = flatdict.FlatterDict(x, delimiter='.')
        for k in flat_type_mapping.keys():
            v = flat.get(k)
            if v is not None:
                if k not in flat_domain_mapping:
                    flat_domain_mapping[k] = set()
                type_ = flat_type_mapping[k]
                try:
                    if type_ == 'array':
                        flat_domain_mapping[k].update(
                            get_by_path(x, k.split('.')))
                    else:
                        flat_domain_mapping[k].add(get_by_path(
                            x, k.split('.')))
                except:
                    if not flat_domain_mapping[k]:
                        del flat_domain_mapping[k]
    for k in flat_domain_mapping:
        flat_domain_mapping[k] = list(flat_domain_mapping[k])
    return flat_domain_mapping
Example #8
0
def cleanEntries(entries, language, environments):
    '''
    Clean up workflow and environment info
    Flattening the dictionary as well
    '''
    entriesArr = []
    for entry in entries:
        if (language != entry['locale']) and not entry['publish_details']:
            continue  # We don't need unpublished and unlocalized items
        envArr = []
        try:
            for environment in entry['publish_details']:
                envArr.append((environments[environment['environment']],
                               environment['locale']))
        except KeyError:
            config.logging.warning(
                'Information about environment(s) missing. Might be missing user permissions.'
            )
        del entry['publish_details']
        workflow = ''
        if '_workflow' in entry:
            try:
                workflow = entry['_workflow']['name']
                del entry['_workflow']
            except KeyError:
                workflow = 'Not available'
                config.logging.warning(
                    'Information about workflow stage missing. Might be missing user permissions.'
                )
        entry = flatdict.FlatterDict(entry)
        entry.set_delimiter('.')
        entry = dict(entry)
        entry['publish_details'] = envArr
        entry['_workflow'] = workflow
        entriesArr.append(entry)
    return entriesArr
Example #9
0
def error_list_exception_handler(exc, context, delimiter='/'):
    """
    Enhanced version of the default DRF exception handler that consolidates all
    of the error detail dicts into a list, and nests that list under a top
    level 'errors' key.  For example:

    ```
    {
        'errors': [
            {
                'code': 'required',
                'message': 'This field is required.',
                'source': '/name'
            },
            {
                'code': 'required',
                'message': 'This field is required.',
                'source': '/value'
            },
        ]
    }
    ```
    """
    def update_key_for_renderer(key, view, request):
        renderer, media_type = view.perform_content_negotiation(
                request, force=True)
        if type(renderer).__name__ == 'CamelCaseJSONRenderer':
            try:
                from djangorestframework_camel_case.util import camelize
                return list(camelize({key: None}).keys())[0]
            except ImportError:
                warnings.warn(
                    'djangorestframework-camel-case is not installed, '
                    'source keys may not render properly'
                )
        return key

    # convert Django 404s and 403s into the DRF equivalents, this is needed so
    # we can get the full details of the exception
    if isinstance(exc, Http404):
        exc = exceptions.NotFound()
    elif isinstance(exc, PermissionDenied):
        exc = exceptions.PermissionDenied()

    # process the exception by the default exception handler to get the
    # response that we need to edit, if that handler can't process it, then
    # return None
    resp = views.exception_handler(exc, context)
    if resp is None:
        return

    details = exc.get_full_details()

    if not isinstance(exc, exceptions.ValidationError) or isinstance(details, list):  # noqa: E501
        # case 1)
        # exception is a validation error or the validation error is top level
        if not isinstance(details, list):
            details = [details]
        resp.data = {'errors': details}
        return resp

    # case 2) the validation errors are nested underneath field
    # name keys
    def fix_int_keys(obj):
        if not isinstance(obj, dict):
            return obj
        return {str(k): fix_int_keys(v) for k, v in obj.items()}
    # some keys maybe ints and not strings, run this function to convert all
    # keys to strings
    details = fix_int_keys(details)
    flattened = flatdict.FlatterDict(details, delimiter)
    fields = {}
    # the error data can be nested into an arbitrary number of levels because
    # of nested serializers, so first build up a dict of all source fields
    for key, value in flattened.items():
        # if value is empty, its just a placeholder, so ignore
        if value:
            # use rsplit to build from the back, so the last 2 items are
            # guaranteed to be the list index and the error key (code, message,
            # etc).  That leaves the entire first item of the tuple as a
            # pointer to the source field
            field, idx, attr = key.rsplit(delimiter, 2)
            if field not in fields:
                fields[field] = {}
            if idx not in fields[field]:
                fields[field][idx] = {}
            fields[field][idx].update({attr: value})

    errors = []
    for field, data in sorted(fields.items()):
        # with the dict of source fields to data errors, ungroup the index keys
        # and add the error to the list
        if field == settings.api_settings.NON_FIELD_ERRORS_KEY:
            # TODO: should resetting the field be parameterized
            field = ''
        if 'view' in context and 'request' in context:
            field = update_key_for_renderer(
                    field, context['view'], context['request'])
        for idx, err in data.items():
            if field:
                field = '{}{}'.format(delimiter, field)
            err['source'] = field
            errors.append(err)
    resp.data = {'errors': errors}
    return resp
Example #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model-dir', type=str, required=True)
    parser.add_argument('--name', type=str)
    parser.add_argument('--load-params', type=str)
    parser.add_argument('--load-optim', type=str)
    args = parser.parse_args()

    cfg_path = os.path.join(args.model_dir, 'config.yaml')
    cfg = Configuration.from_yaml_file(cfg_path)

    global neptune
    if neptune:
        try:
            neptune.init()
            neptune.create_experiment(args.name or args.model_dir,
                                      upload_source_files=[],
                                      params=dict(
                                          flatdict.FlatterDict(cfg.get(),
                                                               delimiter='.')))
        except neptune.exceptions.NeptuneException:
            neptune = None
            traceback.print_exc()

    seed = cfg.get('seed', 0)
    np.random.seed(seed)
    torch.random.manual_seed(seed)

    representation, start_id, end_id = cfg.configure(make_representation)
    print('Vocab size:', len(representation.vocab))

    def encode(music: muspy.Music):
        encoded = representation.encode(music)
        encoded = np.concatenate([[start_id], encoded])
        return encoded

    data_train = muspy.MusicDataset(cfg.get('train_data_path'))
    data_train = cfg['data_augmentation'].configure(AugmentedDataset,
                                                    dataset=data_train,
                                                    seed=seed)
    data_train_pt = data_train.to_pytorch_dataset(factory=encode)

    model = cfg['model'].configure(
        MusicPerformer,
        n_token=len(representation.vocab),
    ).to(DEVICE)

    train_loader = cfg['data_loader'].configure(DataLoader,
                                                dataset=data_train_pt,
                                                collate_fn=functools.partial(
                                                    collate_padded,
                                                    pad_value=end_id,
                                                    max_len=model.max_len),
                                                batch_size=1,
                                                shuffle=True,
                                                num_workers=24)

    val_loaders = {}
    if cfg['val_data_paths']:
        val_loaders = {
            name: cfg['val_data_loader'].configure(
                DataLoader,
                dataset=muspy.MusicDataset(path).to_pytorch_dataset(
                    factory=encode),
                collate_fn=functools.partial(collate_padded,
                                             pad_value=end_id,
                                             max_len=model.max_len),
                batch_size=1,
                shuffle=False,
                num_workers=24)
            for name, path in cfg.get('val_data_paths').items()
        }

    cfg['training'].configure(train,
                              model=model,
                              ckpt_dir=args.model_dir,
                              pretrained_param_path=args.load_params,
                              optimizer_path=args.load_optim,
                              train_dloader=train_loader,
                              val_dloaders=val_loaders,
                              pad_index=end_id)

    cfg.get_unused_keys(warn=True)