Example #1
1
class DotMap(OrderedDict):
    def __init__(self, *args, **kwargs):
        self._map = OrderedDict()
        if args:
            d = args[0]
            if type(d) is dict:
                for k, v in self.__call_items(d):
                    if type(v) is dict:
                        v = DotMap(v)
                    self._map[k] = v
        if kwargs:
            for k, v in self.__call_items(kwargs):
                self._map[k] = v

    def __call_items(self, obj):
        if hasattr(obj, "iteritems") and ismethod(getattr(obj, "iteritems")):
            return obj.iteritems()
        else:
            return obj.items()

    def items(self):
        return self.iteritems()

    def iteritems(self):
        return self.__call_items(self._map)

    def __iter__(self):
        return self._map.__iter__()

    def next(self):
        return self._map.next()

    def __setitem__(self, k, v):
        self._map[k] = v

    def __getitem__(self, k):
        if k not in self._map:
            # automatically extend to new DotMap
            self[k] = DotMap()
        return self._map[k]

    def __setattr__(self, k, v):
        if k == "_map":
            super(DotMap, self).__setattr__(k, v)
        else:
            self[k] = v

    def __getattr__(self, k):
        if k == "_map":
            super(DotMap, self).__getattr__(k)
        else:
            return self[k]

    def __delattr__(self, key):
        return self._map.__delitem__(key)

    def __contains__(self, k):
        return self._map.__contains__(k)

    def __str__(self):
        items = []
        for k, v in self.__call_items(self._map):
            items.append("{0}={1}".format(k, repr(v)))
        out = "DotMap({0})".format(", ".join(items))
        return out

    def __repr__(self):
        return str(self)

    def toDict(self):
        d = {}
        for k, v in self.items():
            if type(v) is DotMap:
                v = v.toDict()
            d[k] = v
        return d

    def pprint(self):
        pprint(self.toDict())

        # proper dict subclassing

    def values(self):
        return self._map.values()

    @classmethod
    def parseOther(self, other):
        if type(other) is DotMap:
            return other._map
        else:
            return other

    def __cmp__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__cmp__(other)

    def __eq__(self, other):
        other = DotMap.parseOther(other)
        if not isinstance(other, dict):
            return False
        return self._map.__eq__(other)

    def __ge__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__ge__(other)

    def __gt__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__gt__(other)

    def __le__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__le__(other)

    def __lt__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__lt__(other)

    def __ne__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__ne__(other)

    def __delitem__(self, key):
        return self._map.__delitem__(key)

    def __len__(self):
        return self._map.__len__()

    def clear(self):
        self._map.clear()

    def copy(self):
        return self

    def get(self, key, default=None):
        return self._map.get(key, default)

    def has_key(self, key):
        return key in self._map

    def iterkeys(self):
        return self._map.iterkeys()

    def itervalues(self):
        return self._map.itervalues()

    def keys(self):
        return self._map.keys()

    def pop(self, key, default=None):
        return self._map.pop(key, default)

    def popitem(self):
        return self._map.popitem()

    def setdefault(self, key, default=None):
        self._map.setdefault(key, default)

    def update(self, *args, **kwargs):
        if len(args) != 0:
            self._map.update(*args)
        self._map.update(kwargs)

    def viewitems(self):
        return self._map.viewitems()

    def viewkeys(self):
        return self._map.viewkeys()

    def viewvalues(self):
        return self._map.viewvalues()

    @classmethod
    def fromkeys(cls, seq, value=None):
        d = DotMap()
        d._map = OrderedDict.fromkeys(seq, value)
        return d
Example #2
1
def main(argv=None):
    """
    Handles command line arguments and gets things started.

    :param argv: List of arguments, as if specified on the command-line.
                 If None, ``sys.argv[1:]`` is used instead.
    :type argv: list of str
    """
    # Get command line arguments
    parser = argparse.ArgumentParser(
        description="Combine MegaM files that \
                                                  contain features for the same\
                                                  files.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "megam_file",
        help="MegaM input file(s). Each feature line must be \
                              preceded by a comment with the filename/ID that \
                              the features should be joined on.",
        nargs="+",
    )
    parser.add_argument(
        "-b",
        "--binary",
        help="Converts all of the features in the specified \
                              range of files to presence/absence binary \
                              features. Files are numbered starting from 1, and\
                              if 0 is specified with this flag, all files are\
                              converted.",
        type=parse_num_list,
    )
    parser.add_argument(
        "--doubleup",
        help="Keep both the binary and numeric versions of any\
                              feature you convert to binary.",
        action="store_true",
    )
    parser.add_argument(
        "-c",
        "--common",
        help="Only output features for filenames that are \
                              common to all MegaM files.",
        action="store_true",
    )
    parser.add_argument("--version", action="version", version="%(prog)s {0}".format(__version__))
    args = parser.parse_args(argv)

    # Make warnings from built-in warnings module get formatted more nicely
    logging.captureWarnings(True)
    logging.basicConfig(format=("%(asctime)s - %(name)s - %(levelname)s - " + "%(message)s"))
    logger = logging.getLogger(__name__)

    # Map from filenames to feature strings
    feature_dict = OrderedDict()
    class_dict = {}
    filename_set = set()

    # Set that will contain all of the features seen in previous files
    # (for duplicate detection)
    prev_feature_set = set()

    # Iterate through MegaM files
    for file_num, infile in enumerate(args.megam_file, start=1):
        # Initialize duplicate feature book-keeping variables
        curr_feature_set = set()

        # Initialize set for storing filenames mentioned in current MegaM file
        curr_filename_set = set()

        # Handle current MegaM file
        for curr_filename, class_name, feature_dict in _MegaMDictIter(infile):
            if curr_filename in class_dict:
                if class_dict[curr_filename] != class_name:
                    raise ValueError(
                        ("Inconsisten class label for instance " + "{} in {}.").format(curr_filename, infile.name)
                    )
            else:
                class_dict[curr_filename] = class_name
            # If there are non-zero features, process them
            if feature_dict:
                for feat_name, feat_val in iteritems(feature_dict):
                    # Handle duplicate features
                    feat_name = get_unique_name(feat_name, prev_feature_set, infile.name)
                    # Ignore zero-valued features
                    try:
                        if feat_val != "N/A" and float(feat_val) != 0:
                            # Convert feature to binary if necessary
                            if args.binary and ((args.binary == [0]) or (file_num in args.binary)):
                                if args.doubleup:
                                    new_feat_pair = "{} {} ".format(feat_name, feat_val)
                                    feature_dict[curr_filename] = (
                                        new_feat_pair
                                        if curr_filename not in feature_dict
                                        else feature_dict[curr_filename] + new_feat_pair
                                    )
                                    curr_feature_set.add(feat_name)
                                    feat_name = get_unique_name(feat_name + "_binary", prev_feature_set, infile.name)
                                feat_val = 1

                            # Add feature pair to current string of features
                            new_feat_pair = "{} {} ".format(feat_name, feat_val)
                            feature_dict[curr_filename] = (
                                new_feat_pair
                                if curr_filename not in feature_dict
                                else feature_dict[curr_filename] + new_feat_pair
                            )
                            curr_feature_set.add(feat_name)
                    except ValueError:
                        raise ValueError(
                            ("Invalid feature value in feature " + "pair '{} {}' for file {}")
                            .format(feat_name, feat_val, curr_filename)
                            .encode("utf-8")
                        )

            # Otherwise warn about lack of features (although that really
            # just means all of them have zero values)
            else:
                if curr_filename not in feature_dict:
                    feature_dict[curr_filename] = ""
                logger.warning(
                    ("No features found for {} in {}. All are " + "assumed to be zero.")
                    .format(curr_filename, infile.name)
                    .encode("utf-8")
                )

        # Add current file's features to set of seen features
        prev_feature_set.update(curr_feature_set)

        # Either intersect or union current file's filenames with existing ones
        if args.common and filename_set:
            filename_set.intersection_update(curr_filename_set)
        else:
            filename_set.update(curr_filename_set)

    # Print new MegaM file
    for curr_filename in feature_dict.viewkeys():
        # Skip files that aren't common when args.common is true
        if curr_filename not in filename_set:
            continue
        print("# {}".format(curr_filename).encode("utf-8"))
        print("{}\t{}".format(class_dict[curr_filename], feature_dict[curr_filename].strip()).encode("utf-8"))
Example #3
1
class DotMap(MutableMapping, OrderedDict):
    def __init__(self, *args, **kwargs):
        self._map = OrderedDict()
        self._dynamic = True
        if kwargs:
            if "_dynamic" in kwargs:
                self._dynamic = kwargs["_dynamic"]
        if args:
            d = args[0]
            if isinstance(d, dict):
                for k, v in self.__call_items(d):
                    if type(v) is dict:
                        v = DotMap(v, _dynamic=self._dynamic)
                    if type(v) is list:
                        l = []
                        for i in v:
                            n = i
                            if type(i) is dict:
                                n = DotMap(i, _dynamic=self._dynamic)
                            l.append(n)
                        v = l
                    self._map[k] = v
        if kwargs:
            for k, v in self.__call_items(kwargs):
                if k is not "_dynamic":
                    self._map[k] = v

    def __call_items(self, obj):
        if hasattr(obj, "iteritems") and ismethod(getattr(obj, "iteritems")):
            return obj.iteritems()
        else:
            return obj.items()

    def items(self):
        return self.iteritems()

    def iteritems(self):
        return self.__call_items(self._map)

    def __iter__(self):
        return self._map.__iter__()

    def next(self):
        return self._map.next()

    def __setitem__(self, k, v):
        self._map[k] = v

    def __getitem__(self, k):
        if k not in self._map and self._dynamic and k != "_ipython_canary_method_should_not_exist_":
            # automatically extend to new DotMap
            self[k] = DotMap()
        return self._map[k]

    def __setattr__(self, k, v):
        if k in {"_map", "_dynamic", "_ipython_canary_method_should_not_exist_"}:
            super(DotMap, self).__setattr__(k, v)
        else:
            self[k] = v

    def __getattr__(self, k):
        if k == {"_map", "_dynamic", "_ipython_canary_method_should_not_exist_"}:
            super(DotMap, self).__getattr__(k)
        else:
            return self[k]

    def __delattr__(self, key):
        return self._map.__delitem__(key)

    def __contains__(self, k):
        return self._map.__contains__(k)

    def __str__(self):
        items = []
        for k, v in self.__call_items(self._map):
            # bizarre recursive assignment situation (why someone would do this is beyond me)
            if id(v) == id(self):
                items.append("{0}=DotMap(...)".format(k))
            else:
                items.append("{0}={1}".format(k, repr(v)))
        out = "DotMap({0})".format(", ".join(items))
        return out

    def __repr__(self):
        return str(self)

    def toDict(self):
        d = {}
        for k, v in self.items():
            if type(v) is DotMap:
                # bizarre recursive assignment support
                if id(v) == id(self):
                    v = d
                else:
                    v = v.toDict()
            elif type(v) is list:
                l = []
                for i in v:
                    n = i
                    if type(i) is DotMap:
                        n = i.toDict()
                    l.append(n)
                v = l
            d[k] = v
        return d

    def pprint(self):
        pprint(self.toDict())

    def empty(self):
        return not any(self)

        # proper dict subclassing

    def values(self):
        return self._map.values()

        # ipython support

    def __dir__(self):
        return self.keys()

    @classmethod
    def parseOther(self, other):
        if type(other) is DotMap:
            return other._map
        else:
            return other

    def __cmp__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__cmp__(other)

    def __eq__(self, other):
        other = DotMap.parseOther(other)
        if not isinstance(other, dict):
            return False
        return self._map.__eq__(other)

    def __ge__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__ge__(other)

    def __gt__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__gt__(other)

    def __le__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__le__(other)

    def __lt__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__lt__(other)

    def __ne__(self, other):
        other = DotMap.parseOther(other)
        return self._map.__ne__(other)

    def __delitem__(self, key):
        return self._map.__delitem__(key)

    def __len__(self):
        return self._map.__len__()

    def clear(self):
        self._map.clear()

    def copy(self):
        return DotMap(self.toDict())

    def __copy__(self):
        return self.copy()

    def __deepcopy__(self, memo=None):
        return self.copy()

    def get(self, key, default=None):
        return self._map.get(key, default)

    def has_key(self, key):
        return key in self._map

    def iterkeys(self):
        return self._map.iterkeys()

    def itervalues(self):
        return self._map.itervalues()

    def keys(self):
        return self._map.keys()

    def pop(self, key, default=None):
        return self._map.pop(key, default)

    def popitem(self):
        return self._map.popitem()

    def setdefault(self, key, default=None):
        self._map.setdefault(key, default)

    def update(self, *args, **kwargs):
        if len(args) != 0:
            self._map.update(*args)
        self._map.update(kwargs)

    def viewitems(self):
        return self._map.viewitems()

    def viewkeys(self):
        return self._map.viewkeys()

    def viewvalues(self):
        return self._map.viewvalues()

    @classmethod
    def fromkeys(cls, seq, value=None):
        d = DotMap()
        d._map = OrderedDict.fromkeys(seq, value)
        return d

    def __getstate__(self):
        return self.__dict__

    def __setstate__(self, d):
        self.__dict__.update(d)
Example #4
1
class BaseCache(object):
    """
    BaseCache is a class that saves and operates on an OrderedDict. It has a
    certain capacity, stored in the attribute `maxsize`. Whether this
    capacity is reached, can be checked by using the boolean property
    `is_full`. To implement a custom cache, inherit from this class and
    override the methods ``__getitem__`` and ``__setitem__``.
    Call the method `sunpy.database.caching.BaseCache.callback` as soon
    as an item from the cache is removed.
    """

    __metaclass__ = ABCMeta

    def __init__(self, maxsize=float("inf")):
        self.maxsize = maxsize
        self._dict = OrderedDict()

    def get(self, key, default=None):  # pragma: no cover
        """Return the corresponding value to `key` if `key` is in the cache,
        `default` otherwise. This method has no side-effects, multiple calls
        with the same cache and the same passed key must always return the same
        value.

        """
        try:
            return self._dict[key]
        except KeyError:
            return default

    @abstractmethod
    def __getitem__(self, key):
        """abstract method: this method must be overwritten by inheriting
        subclasses. It defines what happens if an item from the cache is
        attempted to be accessed.

        """
        return  # pragma: no cover

    @abstractmethod
    def __setitem__(self, key, value):
        """abstract method: this method must be overwritten by inheriting
        subclasses. It defines what happens if a new value should be assigned
        to the given key. If the given key does already exist in the cache or
        not must be checked by the person who implements this method.
        """

    @abstractproperty
    def to_be_removed(self):
        """The item that will be removed on the next
        :meth:`sunpy.database.caching.BaseCache.remove` call.

        """

    @abstractmethod
    def remove(self):
        """Call this method to manually remove one item from the cache. Which
        item is removed, depends on the implementation of the cache. After the
        item has been removed, the callback method is called.

        """

    def callback(self, key, value):
        """This method should be called (by convention) if an item is removed
        from the cache because it is full. The passed key and value are the
        ones that are removed. By default this method does nothing, but it
        can be customized in a custom cache that inherits from this base class.

        """

    @property
    def is_full(self):
        """True if the number of items in the cache equals :attr:`maxsize`,
        False otherwise.

        """
        return len(self._dict) == self.maxsize

    def __delitem__(self, key):
        self._dict.__delitem__(key)

    def __contains__(self, key):
        return key in self._dict.keys()

    def __len__(self):
        return len(self._dict)

    def __iter__(self):
        for key in self._dict.__iter__():
            yield key

    def __reversed__(self):  # pragma: no cover
        for key in self._dict.__reversed__():
            yield key

    def clear(self):  # pragma: no cover
        return self._dict.clear()

    def keys(self):  # pragma: no cover
        return self._dict.keys()

    def values(self):  # pragma: no cover
        return self._dict.values()

    def items(self):  # pragma: no cover
        return self._dict.items()

    def iterkeys(self):  # pragma: no cover
        return self._dict.iterkeys()

    def itervalues(self):  # pragma: no cover
        for value in self._dict.itervalues():
            yield value

    def iteritems(self):  # pragma: no cover
        for key, value in self._dict.iteritems():
            yield key, value

    def update(self, *args, **kwds):  # pragma: no cover
        self._dict.update(*args, **kwds)

    def pop(self, key, default=MutableMapping._MutableMapping__marker):  # pragma: no cover
        return self._dict.pop(key, default)

    def setdefault(self, key, default=None):  # pragma: no cover
        return self._dict.setdefault(key, default)

    def popitem(self, last=True):  # pragma: no cover
        return self._dict.popitem(last)

    def __reduce__(self):  # pragma: no cover
        return self._dict.__reduce__()

    def copy(self):  # pragma: no cover
        return self._dict.copy()

    def __eq__(self, other):  # pragma: no cover
        return self._dict.__eq__(other)

    def __ne__(self, other):  # pragma: no cover
        return self._dict.__ne__(other)

    def viewkeys(self):  # pragma: no cover
        return self._dict.viewkeys()

    def viewvalues(self):  # pragma: no cover
        return self._dict.viewvalues()

    def viewitems(self):  # pragma: no cover
        return self._dict.viewitems()

    @classmethod
    def fromkeys(cls, iterable, value=None):  # pragma: no cover
        return OrderedDict.fromkeys(iterable, value)

    def __repr__(self):  # pragma: no cover
        return "{0}({1!r})".format(self.__class__.__name__, dict(self._dict))