Example #1
0
    def _summarize(self, cv_res):
        """For each case-estimator, return best param draw from cv results."""
        summary = _dict()
        for case_est, data in cv_res.items():

            # For each case and estimator, iterate over draws to find best
            # test score
            best_data = None
            for draw_num, draw_data in data.items():

                if best_data is None:
                    best_data, best_draw = draw_data, draw_num
                    best_data['params'] = self.params[case_est][best_draw]

                if draw_data['test_score_mean'] > best_data['test_score_mean']:
                    best_data, best_draw = draw_data, draw_num
                    best_data['params'] = self.params[case_est][best_draw]

            # Assign data associated with best test score to summary dict
            # We invert the dictionary nesting here
            for metric, val in best_data.items():
                if metric not in summary:
                    summary[metric] = _dict()

                summary[metric][case_est] = val

        return summary
Example #2
0
    def _collect(self):
        """Collect output and format into dicts."""
        # Scores are returned as a list of tuples for each case, est, draw and
        # fold. We need to aggregate them up to case, est and draw level.
        scores = self._aggregate_scores()

        # To build the cv_results dictionary, we loop over the scores dict and
        # aggregate the lists created on the metrics specified.
        cv_res = self._get_results(scores)

        # Summarize best draws for each case-est draw
        summary = self._summarize(cv_res)

        # Finally, we sort summary in order of best performance
        rank = sorted(summary['test_score_mean'],
                      key=itemgetter(1),
                      reverse=True)

        pretty_summary = _dict()
        for metric, data in summary.items():
            pretty_summary[metric] = _dict()

            for case_est in rank:
                pretty_summary[metric][case_est] = data[case_est]

        self.cv_results = cv_res
        self.summary = pretty_summary
Example #3
0
 def __init__(self, random_access_proxy, key_function,
              repr, obj_repr):
     """Initialize the class."""
     # Use key_function=None for default value
     self._proxy = random_access_proxy
     self._key_function = key_function
     self._repr = repr
     self._obj_repr = obj_repr
     if key_function:
         offset_iter = (
             (key_function(k), o, l) for (k, o, l) in random_access_proxy)
     else:
         offset_iter = random_access_proxy
     offsets = _dict()
     for key, offset, length in offset_iter:
         # Note - we don't store the length because I want to minimise the
         # memory requirements. With the SQLite backend the length is kept
         # and is used to speed up the get_raw method (by about 3 times).
         # The length should be provided by all the current backends except
         # SFF where there is an existing Roche index we can reuse (very fast
         # but lacks the record lengths)
         # assert length or format in ["sff", "sff-trim"], \
         #       "%s at offset %i given length %r (%s format %s)" \
         #       % (key, offset, length, filename, format)
         if key in offsets:
             self._proxy._handle.close()
             raise ValueError("Duplicate key '%s'" % key)
         else:
             offsets[key] = offset
     self._offsets = offsets
Example #4
0
    def __init__(self):
        '''
            Initialize the class, should be called once only.
        '''

        current_version = notepad.getPluginVersion()
        if current_version < '1.5.4.0':
            notepad.messageBox(
                'It is needed to run PythonScript version 1.5.4.0 or higher',
                'Unsupported PythonScript verion: {}'.format(current_version))
            return

        self.INDICATOR_ID = 0
        self.registered_lexers = _dict()

        self.document_is_of_interest = False
        self.regexes = None
        self.excluded_styles = None

        editor1.indicSetStyle(self.INDICATOR_ID, INDICATORSTYLE.TEXTFORE)
        editor1.indicSetFlags(self.INDICATOR_ID, INDICFLAG.VALUEFORE)
        editor2.indicSetStyle(self.INDICATOR_ID, INDICATORSTYLE.TEXTFORE)
        editor2.indicSetFlags(self.INDICATOR_ID, INDICFLAG.VALUEFORE)

        editor.callbackSync(self.on_updateui, [SCINTILLANOTIFICATION.UPDATEUI])
        editor.callbackSync(self.on_marginclick,
                            [SCINTILLANOTIFICATION.MARGINCLICK])
        notepad.callback(self.on_langchanged, [NOTIFICATION.LANGCHANGED])
        notepad.callback(self.on_bufferactivated,
                         [NOTIFICATION.BUFFERACTIVATED])
Example #5
0
 def __init__(self, random_access_proxy, key_function,
              repr, obj_repr):
     """Initialize the class."""
     # Use key_function=None for default value
     self._proxy = random_access_proxy
     self._key_function = key_function
     self._repr = repr
     self._obj_repr = obj_repr
     if key_function:
         offset_iter = (
             (key_function(k), o, l) for (k, o, l) in random_access_proxy)
     else:
         offset_iter = random_access_proxy
     offsets = _dict()
     for key, offset, length in offset_iter:
         # Note - we don't store the length because I want to minimise the
         # memory requirements. With the SQLite backend the length is kept
         # and is used to speed up the get_raw method (by about 3 times).
         # The length should be provided by all the current backends except
         # SFF where there is an existing Roche index we can reuse (very fast
         # but lacks the record lengths)
         # assert length or format in ["sff", "sff-trim"], \
         #       "%s at offset %i given length %r (%s format %s)" \
         #       % (key, offset, length, filename, format)
         if key in offsets:
             self._proxy._handle.close()
             raise ValueError("Duplicate key '%s'" % key)
         else:
             offsets[key] = offset
     self._offsets = offsets
Example #6
0
    def to_ident(self, ):
        x = self.response
        d0 = self.__dict__.copy()
        d0.pop('response', None)

        # d = dict(x.headers.copy())
        _d = {}
        d = x.headers
        via = d.get('via', '')
        _d['header_ident'] = None
        if 'varnish' in via:
            _d['header_ident'] = d.get('etag', None)
        if _d['header_ident'] is None:
            _d['header_ident'] = d.get('content-length', None)
        if _d['header_ident'] is None:
            _d['header_ident'] = d.get('content-disposition', None)
        if _d['header_ident'] is None:
            if not x.text:
                raise Exception('HTTP header is not informative!%s' %
                                json.dumps(_dict(x.headers), indent=2))
        # hd = _dict()
        # hd['clen'] =  d.get('Content-Length', None)
        # hd['cdisp'] = d.get('Content-Disposition',None)
        # hd['ctype'] = d.get('Content-Type', None)
        # assert hd['clen'] or hd['cdisp'], hd

        # return [ sorted(d0.items()), ('_header_ident',list(hd.values())), ('_text',x.text)]
        return [
            sorted(d0.items()), ('_header_ident', _d['header_ident']),
            ('_text', x.text)
        ]
Example #7
0
    def _get_results(self, scores):
        """Return score metrics for each case, est and param draw level."""
        cv_res = _dict()
        for name, case_est_data in scores.items():

            if name not in cv_res:
                cv_res[name] = _dict()

            for draw_num, draw_data in case_est_data.items():

                if draw_num not in cv_res[name]:
                    cv_res[name][draw_num] = _dict()

                for key, values in draw_data.items():
                    for n, m in zip(['mean', 'std'], self.metrics):
                        cv_res[name][draw_num]['%s_%s' % (key, n)] = m(values)
        return cv_res
Example #8
0
def to_dict(qresults, key_function=lambda rec: rec.id):
    """Turn a QueryResult iterator or list into a dictionary.

     - qresults     - Iterable returning QueryResult objects.
     - key_function - Optional callback function which when given a
                      QueryResult object should return a unique key for the
                      dictionary.

    This function enables access of QueryResult objects from a single search
    output file using its identifier.

    >>> from Bio import SearchIO
    >>> qresults = SearchIO.parse('Blast/wnts.xml', 'blast-xml')
    >>> search_dict = SearchIO.to_dict(qresults)
    >>> list(search_dict)
    ['gi|195230749:301-1383', 'gi|325053704:108-1166', ..., 'gi|53729353:216-1313']
    >>> search_dict['gi|156630997:105-1160']
    QueryResult(id='gi|156630997:105-1160', 5 hits)

    By default, the dictionary key is the QueryResult's string ID. This may be
    changed by supplying a callback function that returns the desired identifier.
    Here is an example using a function that removes the 'gi|' part in the
    beginning of the QueryResult ID.

    >>> from Bio import SearchIO
    >>> qresults = SearchIO.parse('Blast/wnts.xml', 'blast-xml')
    >>> key_func = lambda qresult: qresult.id.split('|')[1]
    >>> search_dict = SearchIO.to_dict(qresults, key_func)
    >>> list(search_dict)
    ['195230749:301-1383', '325053704:108-1166', ..., '53729353:216-1313']
    >>> search_dict['156630997:105-1160']
    QueryResult(id='gi|156630997:105-1160', 5 hits)

    Note that the callback function does not change the QueryResult's ID value.
    It only changes the key value used to retrieve the associated QueryResult.

    As this function loads all QueryResult objects into memory, it may be
    unsuitable for dealing with files containing many queries. In that case, it
    is recommended that you use either `index` or `index_db`.

    Since Python 3.7, the default dict class maintains key order, meaning
    this dictionary will reflect the order of records given to it. For
    CPython, this was already implemented in 3.6.

    As of Biopython 1.73, we explicitly use OrderedDict for CPython older
    than 3.6 (and for other Python older than 3.7) so that you can always
    assume the record order is preserved.
    """
    qdict = _dict()
    for qresult in qresults:
        key = key_function(qresult)
        if key in qdict:
            raise ValueError("Duplicate key %r" % key)
        qdict[key] = qresult
    return qdict
Example #9
0
def to_dict(qresults, key_function=lambda rec: rec.id):
    """Turn a QueryResult iterator or list into a dictionary.

     - qresults     - Iterable returning QueryResult objects.
     - key_function - Optional callback function which when given a
                      QueryResult object should return a unique key for the
                      dictionary.

    This function enables access of QueryResult objects from a single search
    output file using its identifier.

    >>> from Bio import SearchIO
    >>> qresults = SearchIO.parse('Blast/wnts.xml', 'blast-xml')
    >>> search_dict = SearchIO.to_dict(qresults)
    >>> list(search_dict)
    ['gi|195230749:301-1383', 'gi|325053704:108-1166', ..., 'gi|53729353:216-1313']
    >>> search_dict['gi|156630997:105-1160']
    QueryResult(id='gi|156630997:105-1160', 5 hits)

    By default, the dictionary key is the QueryResult's string ID. This may be
    changed by supplying a callback function that returns the desired identifier.
    Here is an example using a function that removes the 'gi|' part in the
    beginning of the QueryResult ID.

    >>> from Bio import SearchIO
    >>> qresults = SearchIO.parse('Blast/wnts.xml', 'blast-xml')
    >>> key_func = lambda qresult: qresult.id.split('|')[1]
    >>> search_dict = SearchIO.to_dict(qresults, key_func)
    >>> list(search_dict)
    ['195230749:301-1383', '325053704:108-1166', ..., '53729353:216-1313']
    >>> search_dict['156630997:105-1160']
    QueryResult(id='gi|156630997:105-1160', 5 hits)

    Note that the callback function does not change the QueryResult's ID value.
    It only changes the key value used to retrieve the associated QueryResult.

    As this function loads all QueryResult objects into memory, it may be
    unsuitable for dealing with files containing many queries. In that case, it
    is recommended that you use either `index` or `index_db`.

    Since Python 3.7, the default dict class maintains key order, meaning
    this dictionary will reflect the order of records given to it. For
    CPython, this was already implemented in 3.6.

    As of Biopython 1.73, we explicitly use OrderedDict for CPython older
    than 3.6 (and for other Python older than 3.7) so that you can always
    assume the record order is preserved.
    """
    qdict = _dict()
    for qresult in qresults:
        key = key_function(qresult)
        if key in qdict:
            raise ValueError("Duplicate key %r" % key)
        qdict[key] = qresult
    return qdict
Example #10
0
    def _aggregate_scores(self):
        """Aggregate scores to one list per case, est and param draw level."""
        scores = _dict()
        for case, est, draw_num, train_sc, test_sc, fit_time in self.scores_:

            # Strip fold data
            if case is not None:
                name = (case.split('__')[0], est.split('__')[0])
            else:
                name = est.split('__')[0]

            if name not in scores:
                scores[name] = _dict()

            if draw_num not in scores[name]:
                scores[name][draw_num] = _dict(test_score=[],
                                               train_score=[],
                                               fit_time=[])

            scores[name][draw_num]['test_score'].append(test_sc)
            scores[name][draw_num]['train_score'].append(train_sc)
            scores[name][draw_num]['fit_time'].append(fit_time)
Example #11
0
def _get_output_file(FNAME,INDEX_FILE=INDEX_FILE):
    d = [
        ("TS",time.time()),
        ("OUTPUT_FILE",FNAME,),
        ("RUNTIME_FILE", pymisca.header.name__lookup('__file__',level=-1)),
    ]
    d = _dict(d)
    print (json.dumps(d,indent=4))
    with FileLock(  INDEX_FILE +'.lock'):
        with open(INDEX_FILE, "a") as f:
#             f.write(json.dumps(d)+'\n')
            json.dump(d, f)
            f.write('\n')
    return FNAME
Example #12
0
class Dict(_dict):
    """ A dict in which the items can be get/set as attributes.
    """

    __reserved_names__ = dir(_dict())  # Also from OrderedDict
    __pure_names__ = dir(dict())

    __slots__ = []

    def __repr__(self):
        identifier_items = []
        nonidentifier_items = []
        for key, val in self.items():
            if isidentifier(key):
                identifier_items.append("%s=%r" % (key, val))
            else:
                nonidentifier_items.append("(%r, %r)" % (key, val))
        if nonidentifier_items:
            return "Dict([%s], %s)" % (
                ", ".join(nonidentifier_items),
                ", ".join(identifier_items),
            )
        else:
            return "Dict(%s)" % (", ".join(identifier_items))

    def __getattribute__(self, key):
        try:
            return object.__getattribute__(self, key)
        except AttributeError:
            if key in self:
                return self[key]
            else:
                raise

    def __setattr__(self, key, val):
        if key in Dict.__reserved_names__:
            # Either let OrderedDict do its work, or disallow
            if key not in Dict.__pure_names__:
                return _dict.__setattr__(self, key, val)
            else:
                raise AttributeError("Reserved name, this key can only " +
                                     "be set via ``d[%r] = X``" % key)
        else:
            # if isinstance(val, dict): val = Dict(val) -> no, makes a copy!
            self[key] = val

    def __dir__(self):
        names = [k for k in self.keys() if isidentifier(k)]
        return Dict.__reserved_names__ + names
Example #13
0
    def register_lexer(self, lexer_name, _regexes, excluded_styles):
        '''
            reformat provided regexes and cache everything
            within registered_lexers dictionary.

            Args:
                lexer_name = string, expected values as returned by notepad.getLanguageName
                                without the "udf - " if it is an user defined language
                _regexes = dict, in the form of
                                _regexes[(int, (r, g, b))] = (r'', [int])
                excluded_styles = list of integers
            Returns:
                None
        '''
        regexes = _dict()
        for k, v in _regexes.items():
            regexes[(k[0], self.rgb(*k[1]) | INDICVALUE.BIT)] = v
        self.registered_lexers[lexer_name.lower()] = (regexes, excluded_styles)
Example #14
0
def get_test_plan_test_case_info():
    import os
    import json
    global_info_dict = _dict()
    for dirpath, dirnames, filenames in os.walk(os.getcwd() + os.sep +
                                                'test_case_cache' + os.sep +
                                                'Subject' + os.sep +
                                                'Purley_FPGA'):
        i = 0
        for eb in filenames:
            i += 1
            print os.path.join(dirpath, eb)
            with open(os.path.join(dirpath, eb), 'r') as p:
                data = json.load(p)
                signal_data_dict = HPQC_info_parser_tool(data)
                print signal_data_dict
                global_info_dict[
                    signal_data_dict['_test_id']] = signal_data_dict
    print global_info_dict, len(global_info_dict)
Example #15
0
def graph_from_tree(
    lst,
    g=None,
    last=None,
    i=None,
):
    '''
    Accept a list of shape [(node,[(file,[(node,[(file,)])]),]),]
    '''
    this = graph_from_tree
    if g is None:
        g = Digraph('G', strict=True)
        g.attr(rankdir='TB')
        # g = Digraph('G', strict=0,)

    if not i:
        i = [0]
    if last is None:
        last = Path('root')
        g.node(last, label=repr(last), shape='diamond')
    out = []
    for node, node_files in (lst):
        i[0] += 1
        g.edge(last, node.prefix_named)
        g.node(node.prefix_named,
               label=node.to_table_node_label(),
               shape='plaintext')

        with g.subgraph(name='cluster_%s' % node.prefix_named) as c:
            # c.attr(label= node.dotname)
            c.attr(color='blue')
            c.node_attr['style'] = 'filled'
            nout = []
            for file, down_nodes in node_files:
                c.node(file, label='%r' % file.basename(), style='filled')
                c.edge(node.prefix_named, file)
                # .basename())
            for file, down_nodes in node_files:
                res = this(down_nodes, g, file, i)
                nout.append((file.basename(), res))
        out.append((node.prefix_named, _dict(nout)))
    return g
Example #16
0
class Dict(_dict):
    """ A dict in which the keys can be get and set as if they were
    attributes. Very convenient in combination with autocompletion.
    
    This Dict still behaves as much as possible as a normal dict, and
    keys can be anything that are otherwise valid keys. However, 
    keys that are not valid identifiers or that are names of the dict
    class (such as 'items' and 'copy') cannot be get/set as attributes.
    """

    __reserved_names__ = dir(_dict())  # Also from OrderedDict
    __pure_names__ = dir(dict())

    def __getattribute__(self, key):
        try:
            return object.__getattribute__(self, key)
        except AttributeError:
            if key in self:
                return self[key]
            else:
                raise

    def __setattr__(self, key, val):
        if key in Dict.__reserved_names__:
            # Either let OrderedDict do its work, or disallow
            if key not in Dict.__pure_names__:
                return _dict.__setattr__(self, key, val)
            else:
                raise AttributeError('Reserved name, this key can only ' +
                                     'be set via ``d[%r] = X``' % key)
        else:
            # if isinstance(val, dict): val = Dict(val) -> no, makes a copy!
            self[key] = val

    def __dir__(self):
        isidentifier = lambda x: bool(re.match(r'[a-z_]\w*$', x, re.I))
        names = [
            k for k in self.keys()
            if (isinstance(k, string_types) and isidentifier(k))
        ]
        return Dict.__reserved_names__ + names
Example #17
0
def to_dict(sequences, key_function=None):
    """Turn a sequence iterator or list into a dictionary.

    Arguments:
     - sequences  - An iterator that returns SeqRecord objects,
       or simply a list of SeqRecord objects.
     - key_function - Optional callback function which when given a
       SeqRecord should return a unique key for the dictionary.

    e.g. key_function = lambda rec : rec.name
    or,  key_function = lambda rec : rec.description.split()[0]

    If key_function is omitted then record.id is used, on the assumption
    that the records objects returned are SeqRecords with a unique id.

    If there are duplicate keys, an error is raised.

    Since Python 3.7, the default dict class maintains key order, meaning
    this dictionary will reflect the order of records given to it. For
    CPython, this was already implemented in 3.6.

    As of Biopython 1.73, we explicitly use OrderedDict for CPython older
    than 3.6 (and for other Python older than 3.7) so that you can always
    assume the record order is preserved.

    Example usage, defaulting to using the record.id as key:

    >>> from Bio import SeqIO
    >>> filename = "GenBank/cor6_6.gb"
    >>> format = "genbank"
    >>> id_dict = SeqIO.to_dict(SeqIO.parse(filename, format))
    >>> print(list(id_dict))
    ['X55053.1', 'X62281.1', 'M81224.1', 'AJ237582.1', 'L31939.1', 'AF297471.1']
    >>> print(id_dict["L31939.1"].description)
    Brassica rapa (clone bif72) kin mRNA, complete cds

    A more complex example, using the key_function argument in order to
    use a sequence checksum as the dictionary key:

    >>> from Bio import SeqIO
    >>> from Bio.SeqUtils.CheckSum import seguid
    >>> filename = "GenBank/cor6_6.gb"
    >>> format = "genbank"
    >>> seguid_dict = SeqIO.to_dict(SeqIO.parse(filename, format),
    ...               key_function = lambda rec : seguid(rec.seq))
    >>> for key, record in sorted(seguid_dict.items()):
    ...     print("%s %s" % (key, record.id))
    /wQvmrl87QWcm9llO4/efg23Vgg AJ237582.1
    BUg6YxXSKWEcFFH0L08JzaLGhQs L31939.1
    SabZaA4V2eLE9/2Fm5FnyYy07J4 X55053.1
    TtWsXo45S3ZclIBy4X/WJc39+CY M81224.1
    l7gjJFE6W/S1jJn5+1ASrUKW/FA X62281.1
    uVEYeAQSV5EDQOnFoeMmVea+Oow AF297471.1

    This approach is not suitable for very large sets of sequences, as all
    the SeqRecord objects are held in memory. Instead, consider using the
    Bio.SeqIO.index() function (if it supports your particular file format).

    Since Python 3.6, the default dict class maintains key order, meaning
    this dictionary will reflect the order of records given to it. As of
    Biopython 1.72, on older versions of Python we explicitly use an
    OrderedDict so that you can always assume the record order is preserved.
    """
    if key_function is None:
        key_function = lambda rec: rec.id

    d = _dict()
    for record in sequences:
        key = key_function(record)
        if key in d:
            raise ValueError("Duplicate key '%s'" % key)
        d[key] = record
    return d
Example #18
0
def assemble_data(data_list):
    """Build a data dictionary out of a list of entries and data dicts

    Given a list named tuples of dictionaries, :func:`assemble_data`
    returns a nested ordered dictionary with data keys as outer keys and
    tuple names as inner keys. The returned dictionary can be printed in
    tabular format by :func:`assemble_table`.

    .. seealso::
        :class:`Data`, :func:`assemble_table`

    Examples
    --------
    >>> from mlens.metrics import assemble_data, assemble_table
    >>> d = [('row-idx-1.row-idx-2.a.b', {'column-1': 0.1, 'column-2': 0.1})]
    >>> print(assemble_table(assemble_data(d)))
                            column-2-m  column-2-s  column-1-m  column-1-s
    row-idx-1  row-idx-2          0.10        0.00        0.10        0.00
    """
    data = _dict()
    tmp = _dict()

    partitions = _get_partitions(data_list)

    # Collect scores per preprocessing case and estimator(s)
    for name, data_dict in data_list:
        if not data_dict:
            continue

        prefix, name = _split(name, '/', a_s='/')

        # Names are either est.i.j or case.est.i.j
        splitted = name.split('.')
        if partitions:
            name = tuple(splitted[:-1])

            if len(name) == 3:
                name = '%s.%s--%s' % name
            else:
                name = '%s--%s' % name
        else:
            name = '.'.join(splitted[:-2])

        name = '%s%s' % (prefix, name)

        if name not in tmp:
            # Set up data struct for name
            tmp[name] = _dict()
            for k in data_dict.keys():
                tmp[name][k] = list()
                if '%s-m' % k not in data:
                    data['%s-m' % k] = _dict()
                    data['%s-s' % k] = _dict()
                data['%s-m' % k][name] = list()
                data['%s-s' % k][name] = list()

        # collect all data dicts belonging to name
        for k, v in data_dict.items():
            tmp[name][k].append(v)

    # Aggregate to get mean and std
    for name, data_dict in tmp.items():
        for k, v in data_dict.items():
            if not v:
                continue
            try:
                # Purge None values from the main est due to no predict times
                v = [i for i in v if i is not None]
                if v:
                    data['%s-m' % k][name] = np.mean(v)
                    data['%s-s' % k][name] = np.std(v)
            except Exception as exc:
                warnings.warn(
                    "Aggregating data for %s failed. Raw data:\n%r\n"
                    "Details: %r" % (k, v, exc), MetricWarning)

    # Check if there are empty columns
    discard = list()
    for key, data_dict in data.items():
        empty = True
        for val in data_dict.values():
            if val or val == 0:
                empty = False
        if empty:
            discard.append(key)
    for key in discard:
        data.pop(key)
    return data
Example #19
0
def to_dict(sequences, key_function=None):
    """Turn a sequence iterator or list into a dictionary.

    Arguments:
     - sequences  - An iterator that returns SeqRecord objects,
       or simply a list of SeqRecord objects.
     - key_function - Optional callback function which when given a
       SeqRecord should return a unique key for the dictionary.

    e.g. key_function = lambda rec : rec.name
    or,  key_function = lambda rec : rec.description.split()[0]

    If key_function is omitted then record.id is used, on the assumption
    that the records objects returned are SeqRecords with a unique id.

    If there are duplicate keys, an error is raised.

    Since Python 3.7, the default dict class maintains key order, meaning
    this dictionary will reflect the order of records given to it. For
    CPython, this was already implemented in 3.6.

    As of Biopython 1.73, we explicitly use OrderedDict for CPython older
    than 3.6 (and for other Python older than 3.7) so that you can always
    assume the record order is preserved.

    Example usage, defaulting to using the record.id as key:

    >>> from Bio import SeqIO
    >>> filename = "GenBank/cor6_6.gb"
    >>> format = "genbank"
    >>> id_dict = SeqIO.to_dict(SeqIO.parse(filename, format))
    >>> print(list(id_dict))
    ['X55053.1', 'X62281.1', 'M81224.1', 'AJ237582.1', 'L31939.1', 'AF297471.1']
    >>> print(id_dict["L31939.1"].description)
    Brassica rapa (clone bif72) kin mRNA, complete cds

    A more complex example, using the key_function argument in order to
    use a sequence checksum as the dictionary key:

    >>> from Bio import SeqIO
    >>> from Bio.SeqUtils.CheckSum import seguid
    >>> filename = "GenBank/cor6_6.gb"
    >>> format = "genbank"
    >>> seguid_dict = SeqIO.to_dict(SeqIO.parse(filename, format),
    ...               key_function = lambda rec : seguid(rec.seq))
    >>> for key, record in sorted(seguid_dict.items()):
    ...     print("%s %s" % (key, record.id))
    /wQvmrl87QWcm9llO4/efg23Vgg AJ237582.1
    BUg6YxXSKWEcFFH0L08JzaLGhQs L31939.1
    SabZaA4V2eLE9/2Fm5FnyYy07J4 X55053.1
    TtWsXo45S3ZclIBy4X/WJc39+CY M81224.1
    l7gjJFE6W/S1jJn5+1ASrUKW/FA X62281.1
    uVEYeAQSV5EDQOnFoeMmVea+Oow AF297471.1

    This approach is not suitable for very large sets of sequences, as all
    the SeqRecord objects are held in memory. Instead, consider using the
    Bio.SeqIO.index() function (if it supports your particular file format).

    Since Python 3.6, the default dict class maintains key order, meaning
    this dictionary will reflect the order of records given to it. As of
    Biopython 1.72, on older versions of Python we explicitly use an
    OrderedDict so that you can always assume the record order is preserved.
    """
    # This is to avoid a lambda function:

    def _default_key_function(rec):
        return rec.id

    if key_function is None:
        key_function = _default_key_function

    d = _dict()
    for record in sequences:
        key = key_function(record)
        if key in d:
            raise ValueError("Duplicate key '%s'" % key)
        d[key] = record
    return d
Example #20
0
    def _parseFile(self, file_path, ignoreCategories, preserve_token_order, onlyCategories):
        """Private method that will do the work of parsing the mmCIF data file
        return Dictionary"""

        if preserve_token_order:
            try:
                from collections import OrderedDict as _dict
            except ImportError:
                # fallback: try to use the ordereddict backport when using python 2.6
                try:
                    from ordereddict import OrderedDict as _dict
                except ImportError:
                    # backport not installed: use local OrderedDict
                    from mmCif.ordereddict import OrderedDict as _dict
        else:
            _dict = dict


        mmcif_like_file = _dict()
        data_block = _dict()
        save_block = _dict()

        data_heading = ""
        line_num = 0
        try:
            with openGzip(file_path, 'r') as f1:
                table_names = []
                table_values = []
                table_values_array = []
                isLoop = False
                multiLineValue = False
                skipCategory = False
                for line in f1:
                    line_num+=1
                    if skipCategory:
                        flag = False
                        while line:
                            check = (line.strip().startswith('_') or
                                self.loopRE.match(line.strip()[:5]) or
                                self.saveRE.match(line.strip()[:5]) or
                                self.dataRE.match(line.strip()[:5]))
                            if flag:
                                if check:
                                    isLoop = False
                                    break
                            else:
                                if not check:
                                    flag = True
                            if not (self.saveRE.match(line.strip()[:5]) or
                                self.dataRE.match(line.strip()[:5])):
                                try:
                                    line = next(f1)
                                    line_num+=1
                                except StopIteration:
                                    break
                            else:
                                break
                        skipCategory = False

                    if isLoop is True and table_values_array != [] and (self.loopRE.match(line) is not None or (line.strip().startswith('_'))):
                        isLoop = False
                        num_item = len(table_names)
                        if len(table_values_array) % num_item != 0:
                            raise MMCIFWrapperSyntaxError(category)
                        for val_index, item in enumerate(table_names):
                            data_block[category][item] = table_values_array[val_index::num_item]
                        table_values_array = []

                    if line.strip() == "":
                        continue
                    if line.startswith('#'):
                        continue
                    if '\t#' in line or ' #' in line and not line.startswith(';'):
                        new_line = ''
                        for tok in self.dataValueRE.findall(line):
                            if not tok.startswith('#'):
                                new_line += tok+" "
                            else:
                                break
                        # make sure to preserve the fact that ';' was not the first character
                        line = new_line if not new_line.startswith(';') else " "+new_line
                        # Fails for entries "3snv", "1kmm", "1ser", "2prg", "3oqd"
                        # line = re.sub(r'\s#.*$', '', line)
                    if line.startswith(';'):
                        while '\n;' not in line:
                            try:
                                line += next(f1)
                                line_num+=1
                            except StopIteration:
                                break
                        multiLineValue = True
                    if self.dataRE.match(line):
                        if data_block != {}:
                            if table_values_array != []:
                                isLoop = False
                                num_item = len(table_names)
                                if len(table_values_array) % num_item != 0:
                                    raise mmCifSyntaxError(category)
                                for val_index, item in enumerate(table_names):
                                    data_block[category][item] = table_values_array[val_index::num_item]
                                table_names = []
                                table_values_array = []
                            mmcif_like_file[data_heading] = data_block
                            data_block = _dict()
                        data_heading = self.dataRE.match(line).group('data_heading')
                    elif self.saveRE.match(line):
                        while line.strip() != 'save_':
                            try:
                                line = next(f1)
                                line_num+=1
                            except StopIteration:
                                break
                        continue
                    elif self.loopRE.match(line):
                        # Save and clear the table_values_array buffer from the
                        # previous loop that was read
                        if table_values_array != []:
                            for itemIndex, name in enumerate(table_names):
                                data_block[category].update({name:[row[itemIndex] for row in table_values_array]})
                            table_values_array = []
                        isLoop = True
                        category, item, value = None, None, None
                        #Stores items of a category listed in loop blocks
                        table_names = []
                        #Stores values of items in a loop as a single row
                        table_values = []
                    elif self.dataNameRE.match(line):
                        # Match category and item simultaneously
                        m = self.dataNameRE.match(line)
                        category = m.group('data_category')
                        item = m.group('category_item')
                        remainder = m.group('remainder')
                        value = None
                        if isLoop and remainder != '':
                            """Append any data values following the last loop
                            category.item tag should any exist"""
                            table_values += self._tokenizeData(remainder)
                            line = ''
                        else:
                            line = remainder + "\n"
                        if not isLoop:
                            if line.strip() != '':
                                value = self._tokenizeData(line)
                            else:
                                # For cases where values are on the following
                                # line
                                try:
                                    line = next(f1)
                                    line_num +=1
                                except StopIteration:
                                    break
                            while value is None:
                                char_start = 1 if line.startswith(';') else 0
                                while line.startswith(';') and not line.rstrip().endswith('\n;'):
                                    try:
                                        line += next(f1)
                                        line_num+=1
                                    except StopIteration:
                                        break
                                value = (line[char_start:line.rfind('\n;')]).strip()
                                if char_start > 0:
                                    value = (line[char_start:line.rfind('\n;')]).strip()
                                else:
                                    value = self._tokenizeData(" "+line)
                            if (ignoreCategories and category in ignoreCategories) or (onlyCategories and category not in onlyCategories):
                                pass
                            else:
                                if category in data_block:
                                    data_block[category].update({item: value if len(value) > 1 else value[0]})
                                else:
                                    data_block.setdefault(category, _dict({item: value if len(value) > 1 else value[0]})) # OrderedDict here preserves item order
                        else:
                            if (ignoreCategories and category in ignoreCategories) or (onlyCategories and category not in onlyCategories):
                                skipCategory = True
                            else:
                                data_block.setdefault(category, _dict()) # OrderedDict here preserves item order
                                table_names.append(item)
                    else:
                        if multiLineValue is True:
                            table_values.append((line[1:line.rfind('\n;')]).strip())
                            multiLineValue = False
                            line = line[line.rfind('\n;') + 2:]
                            if line.strip() != '':
                                table_values += self._tokenizeData(line)
                        else:
                            table_values += self._tokenizeData(line)

                        if table_values != []:
                            table_values_array += table_values
                            table_values = []
                if isLoop is True and table_values_array != []:
                    isLoop = False
                    num_item = len(table_names)
                    for val_index, item in enumerate(table_names):
                        data_block[category][item] = table_values_array[val_index::num_item]
                    table_values_array = []
                if data_block != {}:
                    mmcif_like_file[data_heading] = data_block
            return mmcif_like_file
        except KeyError as key_err:
            print("KeyError [line %i]: %s" %(line_num, str(key_err)))
        except IOError as io_err:
            print("IOException [line %i]: %s" % (line_num, str(io_err)))
Example #21
0
class Parameters(_dict):
    """ A dict in which the items can be get/set as attributes.
    """
    
    __reserved_names__ = dir(_dict())  # Also from OrderedDict
    __pure_names__ = dir(dict())
    
    __slots__ = []
    
    def __repr__(self):
        identifier_items = []
        nonidentifier_items = []
        for key, val in self.items():
            if isidentifier(key):
                identifier_items.append('%s=%r' % (key, val))
            else:
                nonidentifier_items.append('(%r, %r)' % (key, val))
        if nonidentifier_items:
            return 'Parameters([%s], %s)' % (', '.join(nonidentifier_items),
                                       ', '.join(identifier_items))
        else:
            return 'Parameters(%s)' % (', '.join(identifier_items))
    
    def __str__(self):
        
        # Get alignment value
        c = 0
        for key in self:
            c = max(c, len(key))
        
        # How many chars left (to print on less than 80 lines)
        charsLeft = 79 - (c+6)
        
        s = '<%i parameters>\n' % len(self)
        for key in self.keys():
            valuestr = repr(self[key])
            if len(valuestr) > charsLeft:
                valuestr = valuestr[:charsLeft-3] + '...'
            s += key.rjust(c+4) + ": %s\n" % (valuestr)
        return s
    
    def __getattribute__(self, key):
        try:
            return object.__getattribute__(self, key)
        except AttributeError:
            if key in self:
                return self[key]
            else:
                raise
    
    def __setattr__(self, key, val):
        if key in self.__class__.__reserved_names__:
            # Either let OrderedDict do its work, or disallow
            if key not in self.__class__.__pure_names__:
                return _dict.__setattr__(self, key, val)
            else:
                raise AttributeError('Reserved name, this key can only ' +
                                     'be set via ``d[%r] = X``' % key)
        else:
            # if isinstance(val, dict): val = Dict(val) -> no, makes a copy!
            self[key] = val
    
    def __dir__(self):
        names = [k for k in self.keys() if isidentifier(k)]
        return self.__class__.__reserved_names__ + names
Example #22
0
    def _parseFile(self, file_path, ignoreCategories, preserve_token_order,
                   onlyCategories):
        """Private method that will do the work of parsing the mmCIF data file
        return Dictionary"""

        if preserve_token_order:
            try:
                from collections import OrderedDict as _dict
            except ImportError:
                # fallback: try to use the ordereddict backport when using python 2.6
                try:
                    from ordereddict import OrderedDict as _dict
                except ImportError:
                    # backport not installed: use local OrderedDict
                    from mmCif.ordereddict import OrderedDict as _dict
        else:
            _dict = dict

        mmcif_like_file = _dict()
        data_block = _dict()
        save_block = _dict()

        data_heading = ""
        line_num = 0
        try:
            with openGzip(file_path, "rt") as f1:
                table_names = []
                table_values = []
                table_values_array = []
                isLoop = False
                multiLineValue = False
                skipCategory = False
                for line in f1:
                    line_num += 1
                    if skipCategory:
                        flag = False
                        while line:
                            check = (line.strip().startswith("_")
                                     or self.loopRE.match(line.strip()[:5])
                                     or self.saveRE.match(line.strip()[:5])
                                     or self.dataRE.match(line.strip()[:5]))
                            if flag:
                                if check:
                                    isLoop = False
                                    break
                            else:
                                if not check:
                                    flag = True
                            if not (self.saveRE.match(line.strip()[:5])
                                    or self.dataRE.match(line.strip()[:5])):
                                try:
                                    line = next(f1)
                                    line_num += 1
                                except StopIteration:
                                    break
                            else:
                                break
                        skipCategory = False

                    if (isLoop is True and table_values_array != []
                            and (self.loopRE.match(line) is not None or
                                 (line.strip().startswith("_")))):
                        isLoop = False
                        num_item = len(table_names)
                        if len(table_values_array) % num_item != 0:
                            raise MMCIFWrapperSyntaxError(category)
                        for val_index, item in enumerate(table_names):
                            data_block[category][item] = table_values_array[
                                val_index::num_item]
                        table_values_array = []

                    if line.strip() == "":
                        continue
                    if line.startswith("#"):
                        continue
                    if "\t#" in line or " #" in line and not line.startswith(
                            ";"):
                        new_line = ""
                        for tok in self.dataValueRE.findall(line):
                            if not tok.startswith("#"):
                                new_line += tok + " "
                            else:
                                break
                        # make sure to preserve the fact that ';' was not the first character
                        line = (new_line if not new_line.startswith(";") else
                                " " + new_line)
                        # Fails for entries "3snv", "1kmm", "1ser", "2prg", "3oqd"
                        # line = re.sub(r'\s#.*$', '', line)
                    if line.startswith(";"):
                        while "\n;" not in line:
                            try:
                                line += next(f1)
                                line_num += 1
                            except StopIteration:
                                break
                        multiLineValue = True
                    if self.dataRE.match(line):
                        if data_block != {}:
                            if table_values_array != []:
                                isLoop = False
                                num_item = len(table_names)
                                if len(table_values_array) % num_item != 0:
                                    raise mmCifSyntaxError(category)
                                for val_index, item in enumerate(table_names):
                                    data_block[category][
                                        item] = table_values_array[
                                            val_index::num_item]
                                table_names = []
                                table_values_array = []
                            mmcif_like_file[data_heading] = data_block
                            data_block = _dict()
                        data_heading = self.dataRE.match(line).group(
                            "data_heading")
                    elif self.saveRE.match(line):
                        while line.strip() != "save_":
                            try:
                                line = next(f1)
                                line_num += 1
                            except StopIteration:
                                break
                        continue
                    elif self.loopRE.match(line):
                        # Save and clear the table_values_array buffer from the
                        # previous loop that was read
                        if table_values_array != []:
                            for itemIndex, name in enumerate(table_names):
                                data_block[category].update({
                                    name: [
                                        row[itemIndex]
                                        for row in table_values_array
                                    ]
                                })
                            table_values_array = []
                        isLoop = True
                        category, item, value = None, None, None
                        # Stores items of a category listed in loop blocks
                        table_names = []
                        # Stores values of items in a loop as a single row
                        table_values = []
                    elif self.dataNameRE.match(line):
                        # Two step process STAR does not know contept of categories
                        m = self.dataNameRE.match(line)
                        flag = m.group("data_category")

                        tmp_category = self.dataCategoryItem.match(flag)
                        if tmp_category:
                            category = tmp_category.group("data_category")
                            item = tmp_category.group("category_item")
                        else:
                            category = ""
                            item = flag

                        remainder = m.group("remainder")
                        value = None
                        if isLoop and remainder != "":
                            """Append any data values following the last loop
                            category.item tag should any exist"""
                            table_values += self._tokenizeData(remainder)
                            line = ""
                        else:
                            line = remainder + "\n"
                        if not isLoop:
                            if line.strip() != "":
                                value = self._tokenizeData(line)
                            else:
                                # For cases where values are on the following
                                # line
                                try:
                                    line = next(f1)
                                    line_num += 1
                                except StopIteration:
                                    break
                            while value is None:
                                char_start = 1 if line.startswith(";") else 0
                                while line.startswith(";") and not line.rstrip(
                                ).endswith("\n;"):
                                    try:
                                        line += next(f1)
                                        line_num += 1
                                    except StopIteration:
                                        break
                                value = (line[char_start:line.rfind("\n;")]
                                         ).strip()
                                if char_start > 0:
                                    value = (line[char_start:line.rfind("\n;")]
                                             ).strip()
                                else:
                                    value = self._tokenizeData(" " + line)
                            if (ignoreCategories
                                    and category in ignoreCategories) or (
                                        onlyCategories
                                        and category not in onlyCategories):
                                pass
                            else:
                                if category in data_block:
                                    data_block[category].update({
                                        item:
                                        value if len(value) > 1 else value[0]
                                    })
                                else:
                                    data_block.setdefault(
                                        category,
                                        _dict({
                                            item:
                                            value
                                            if len(value) > 1 else value[0]
                                        }),
                                    )  # OrderedDict here preserves item order
                        else:
                            if (ignoreCategories
                                    and category in ignoreCategories) or (
                                        onlyCategories
                                        and category not in onlyCategories):
                                skipCategory = True
                            else:
                                data_block.setdefault(category, _dict(
                                ))  # OrderedDict here preserves item order
                                table_names.append(item)
                    else:
                        if multiLineValue is True:
                            table_values.append(
                                (line[1:line.rfind("\n;")]).strip())
                            multiLineValue = False
                            line = line[line.rfind("\n;") + 2:]
                            if line.strip() != "":
                                table_values += self._tokenizeData(line)
                        else:
                            table_values += self._tokenizeData(line)

                        if table_values != []:
                            table_values_array += table_values
                            table_values = []
                if isLoop is True and table_values_array != []:
                    isLoop = False
                    num_item = len(table_names)
                    for val_index, item in enumerate(table_names):
                        data_block[category][item] = table_values_array[
                            val_index::num_item]
                    table_values_array = []
                if data_block != {}:
                    mmcif_like_file[data_heading] = data_block
            return mmcif_like_file
        except KeyError as key_err:
            print("KeyError [line %i]: %s" % (line_num, str(key_err)))
        except IOError as io_err:
            print("IOException [line %i]: %s" % (line_num, str(io_err)))
Example #23
0
                continue

            layer, k = _split(dat_key, '/')
            case, k = _split(k, '.')
            est, part = _split(k, '--', reverse=True)

            # Header space before column headings
            items = [i for i in [layer, case, est, part] if i != '']
            buffer = max(buffer, len('  '.join(items)))

            for k, v in zip(row_glossary, [layer, case, est, part]):
                v_ = len(v)
                if v_ > max_row_len[k]:
                    max_row_len[k] = v_

            dat = _dict()
            dat['layer'] = layer
            dat['case'] = case
            dat['est'] = est
            dat['part'] = part
            row_keys.append(dat_key)
            rows.append(dat)

    # Check which row name columns we can drop (ex partition number)
    drop = list()
    for k, v in max_row_len.items():
        if v == 0:
            drop.append(k)

    # Header
    out = " " * (buffer + padding)
Example #24
0
#
#   The basic structure always looks like this
#
#       regexes[(a, b)] = (c, d)
#
#
#   regexes = an ordered dictionary which ensures that the regular expressions
#             are always processed in the same order.
#   a = an unique number - suggestion, start with 0 and always increase by one (per lexer)
#   b = color tuple in the form of (r,g,b). Example (255,0,0) for the color red.
#   c = raw byte string, describes the regular expression. Example r'\w+'
#   d = integer, denotes which match group should be considered

# Example
# builtin lexers - like python
py_regexes = _dict()

# cls and self objects - return match 0
py_regexes[(0, (224, 108, 117))] = (r'\b(cls|self)\b', 0)
# function parameters - return match 1
py_regexes[(1, (209, 154, 102))] = (r'(?:(?:def)\s\w+)\s*\((.+)\):', 1)
# args and kwargs - return match 0
py_regexes[(2, (86, 182, 194))] = (r'(\*|\*\*)(?=\w)', 0)
# functions and class instances but not definitions - return match 1
py_regexes[(3, (79, 175,
                239))] = (r'class\s*\w+?(?=\()|def\s*\w+?(?=\()|(\w+?(?=\())',
                          1)
# dunder functions and special keywords - return match 0
py_regexes[(4, (86, 182, 194))] = (
    r'\b(editor|editor1|editor2|notepad|console|__\w+__|super|object|type|print)\b',
    0)
Example #25
0
def _emit(key,
          value,
          content_handler,
          attr_prefix='@',
          cdata_key='#text',
          depth=0,
          preprocessor=None,
          pretty=False,
          newl='\n',
          indent='\t',
          namespace_separator=':',
          namespaces=None,
          full_document=True,
          expand_iter=None):
    key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
    if preprocessor is not None:
        result = preprocessor(key, value)
        if result is None:
            return
        key, value = result
    if (not hasattr(value, '__iter__') or isinstance(value, _basestring)
            or isinstance(value, dict)):
        value = [value]
    for index, v in enumerate(value):
        if full_document and depth == 0 and index > 0:
            raise ValueError('document with multiple roots')
        if v is None:
            v = _dict()
        elif isinstance(v, bool):
            if v:
                v = _unicode('true')
            else:
                v = _unicode('false')
        elif not isinstance(v, dict):
            if expand_iter and hasattr(
                    v, '__iter__') and not isinstance(v, _basestring):
                v = _dict(((expand_iter, v), ))
            else:
                v = _unicode(v)
        if isinstance(v, _basestring):
            v = _dict(((cdata_key, v), ))
        cdata = None
        attrs = _dict()
        children = []
        for ik, iv in v.items():
            if ik == cdata_key:
                cdata = iv
                continue
            if ik.startswith(attr_prefix):
                ik = _process_namespace(ik, namespaces, namespace_separator,
                                        attr_prefix)
                if ik == '@xmlns' and isinstance(iv, dict):
                    for k, v in iv.items():
                        attr = 'xmlns{}'.format(':{}'.format(k) if k else '')
                        attrs[attr] = _unicode(v)
                    continue
                if not isinstance(iv, _unicode):
                    iv = _unicode(iv)
                attrs[ik[len(attr_prefix):]] = iv
                continue
            children.append((ik, iv))
        if pretty:
            content_handler.ignorableWhitespace(depth * indent)
        content_handler.startElement(key, AttributesImpl(attrs))
        if pretty and children:
            content_handler.ignorableWhitespace(newl)
        for child_key, child_value in children:
            _emit(child_key,
                  child_value,
                  content_handler,
                  attr_prefix,
                  cdata_key,
                  depth + 1,
                  preprocessor,
                  pretty,
                  newl,
                  indent,
                  namespaces=namespaces,
                  namespace_separator=namespace_separator,
                  expand_iter=expand_iter)
        if cdata is not None:
            content_handler.characters(cdata)
        if pretty and children:
            content_handler.ignorableWhitespace(depth * indent)
        content_handler.endElement(key)
        if pretty and depth:
            content_handler.ignorableWhitespace(newl)
Example #26
0
 def __repr__(self):
     return "%s(%s)" % (self.__class__.__name__,
                        json.dumps(_dict([(k, getattr(self, k))
                                          for k in ['method', 'url']]),
                                   default=repr,
                                   separators=',='))
Example #27
0
class Dict(_dict):
    """ A dict in which the items can be get/set as attributes.
    
    This provides a lean way to represent structured data, and works
    well in combination with autocompletion. Keys can be anything that
    are otherwise valid keys, but keys that are not valid identifiers
    or that are methods of the dict class (e.g. 'items' or 'copy')
    can only be get/set in the classic way.
    
    Example:
    
    .. code-block:: python
    
        >> d = Dict(foo=3)
        >> d.foo
        3
        >> d['foo'] = 4
        >> d.foo
        4
        >> d.bar = 5
        >> d.bar
        5
        
    """

    __reserved_names__ = dir(_dict())  # Also from OrderedDict
    __pure_names__ = dir(dict())

    __slots__ = []

    def __repr__(self):
        identifier_items = []
        nonidentifier_items = []
        for key, val in self.items():
            if isidentifier(key):
                identifier_items.append('%s=%r' % (key, val))
            else:
                nonidentifier_items.append('(%r, %r)' % (key, val))
        if nonidentifier_items:
            return 'Dict([%s], %s)' % (', '.join(nonidentifier_items),
                                       ', '.join(identifier_items))
        else:
            return 'Dict(%s)' % (', '.join(identifier_items))

    def __getattribute__(self, key):
        try:
            return object.__getattribute__(self, key)
        except AttributeError:
            if key in self:
                return self[key]
            else:
                raise

    def __setattr__(self, key, val):
        if key in Dict.__reserved_names__:
            # Either let OrderedDict do its work, or disallow
            if key not in Dict.__pure_names__:
                return _dict.__setattr__(self, key, val)
            else:
                raise AttributeError('Reserved name, this key can only ' +
                                     'be set via ``d[%r] = X``' % key)
        else:
            # if isinstance(val, dict): val = Dict(val) -> no, makes a copy!
            self[key] = val

    def __dir__(self):
        names = [k for k in self.keys() if isidentifier(k)]
        return Dict.__reserved_names__ + names