Esempio n. 1
0
    def __init__(self, catalog=None, name=None, stub=False):
        """Create a new `Entry` object with the given `name`.

        Arguments
        ---------
        catalog : `astrocats.catalog.catalog.Catalog` instance
            The parent catalog object of which this entry belongs.
        name : str
            The name of this entry, e.g. `SN1987A` for a `Supernova` entry.
        stub : bool
            Whether or not this instance represents a 'stub' (see above).

        """
        super(Entry, self).__init__()
        self.catalog = catalog
        self.filename = None
        self.dupe_of = []
        self._stub = stub
        if catalog:
            self._log = catalog.log
        else:
            from astrocats.catalog.catalog import Catalog
            self._log = logging.getLogger()
            self.catalog = Catalog(None, self._log)
        self[self._KEYS.NAME] = name
        return
Esempio n. 2
0
class Entry(OrderedDict):
    """Class representing an individual element of each Catalog.

    For example, a single supernova in the supernova catalog, this object
    handles and manages the addition of data for this `Entry`, using different
    `CatDict` instances (e.g. `Photometry`).

    Notes
    -----
    -   Stubs: a stub is the most minimal entry, containing an entry's 'name'
        and possible aliases.  These instances are used to represent entries
        which are known to exist (e.g. have already been saved) for cross
        referencing and duplicate removal.
        +   The `Entry.get_stub` method returns the 'stub' corresponding to the
            Entry instance.  i.e. it returns a *new object* with only the name
            and aliases copied over.

    Attributes
    ----------
    catalog : `astrocats.catalog.catalog.Catalog` object
        Pointer to the parent catalog object of which this entry is a member.
    filename : str or 'None'
        If this entry is loaded from a file, its (full path and) filename.
    _log : `logging.Logger` object
        Pointer to the logger from the parent catalog.
    _stub : bool
        Whether this instance represents a 'stub' (see above).
    _KEYS : `astrocats.catalog.key.KeyCollection` object
        The associated object which contains the different dictionary keys
        used in this type (e.g. `Supernova`) entry.

    """

    _KEYS = ENTRY

    def __init__(self, catalog=None, name=None, stub=False):
        """Create a new `Entry` object with the given `name`.

        Arguments
        ---------
        catalog : `astrocats.catalog.catalog.Catalog` instance
            The parent catalog object of which this entry belongs.
        name : str
            The name of this entry, e.g. `SN1987A` for a `Supernova` entry.
        stub : bool
            Whether or not this instance represents a 'stub' (see above).

        """
        super(Entry, self).__init__()
        self.catalog = catalog
        self.filename = None
        self.dupe_of = []
        self._stub = stub
        if catalog:
            self._log = catalog.log
        else:
            from astrocats.catalog.catalog import Catalog
            self._log = logging.getLogger()
            self.catalog = Catalog(None, self._log)
        self[self._KEYS.NAME] = name
        return

    def __repr__(self):
        """Return JSON representation of self."""
        jsonstring = dict_to_pretty_string({ENTRY.NAME: self})
        return jsonstring

    def _append_additional_tags(self, quantity, source, cat_dict):
        """Append additional bits of data to an existing quantity.

        Called when a newly added quantity is found to be a duplicate.
        """
        pass

    def _get_save_path(self, bury=False):
        """Return the path that this Entry should be saved to."""
        filename = self.get_filename(self[self._KEYS.NAME])

        # Put objects that shouldn't belong in this catalog in the boneyard
        if bury:
            outdir = self.catalog.get_repo_boneyard()

        # Get normal repository save directory
        else:
            repo_folders = self.catalog.PATHS.get_repo_output_folders()
            # If no repo folders exist, raise an error -- cannot save
            if not len(repo_folders):
                err_str = (
                    "No output data repositories found. Cannot save.\n"
                    "Make sure that repo names are correctly configured "
                    "in the `input/repos.json` file, and either manually or "
                    "automatically (using `astrocats CATALOG git-clone`) "
                    "clone the appropriate data repositories.")
                self.catalog.log.error(err_str)
                raise RuntimeError(err_str)

            outdir = repo_folders[0]

        return outdir, filename

    def _ordered(self, odict):
        """Convert the object into a plain OrderedDict."""
        ndict = OrderedDict()

        if isinstance(odict, CatDict) or isinstance(odict, Entry):
            key = odict.sort_func
        else:
            key = None

        nkeys = list(sorted(odict.keys(), key=key))
        for key in nkeys:
            if isinstance(odict[key], OrderedDict):
                odict[key] = self._ordered(odict[key])
            if isinstance(odict[key], list):
                if (not (odict[key] and
                         not isinstance(odict[key][0], OrderedDict))):
                    nlist = []
                    for item in odict[key]:
                        if isinstance(item, OrderedDict):
                            nlist.append(self._ordered(item))
                        else:
                            nlist.append(item)
                    odict[key] = nlist
            ndict[key] = odict[key]

        return ndict

    def get_hash(self, keys=[]):
        """Return a unique hash associated with the listed keys."""
        if not len(keys):
            keys = list(self.keys())

        string_rep = ''
        oself = self._ordered(deepcopy(self))
        for key in keys:
            string_rep += json.dumps(oself.get(key, ''), sort_keys=True)

        return hashlib.sha512(string_rep.encode()).hexdigest()[:16]

    def _clean_quantity(self, quantity):
        """Clean quantity value before it is added to entry."""
        value = quantity.get(QUANTITY.VALUE, '').strip()
        error = quantity.get(QUANTITY.E_VALUE, '').strip()
        unit = quantity.get(QUANTITY.U_VALUE, '').strip()
        kind = quantity.get(QUANTITY.KIND, '')

        if isinstance(kind, list) and not isinstance(kind, string_types):
            kind = [x.strip() for x in kind]
        else:
            kind = kind.strip()

        if not value:
            return False

        if is_number(value):
            value = '%g' % Decimal(value)
        if error:
            error = '%g' % Decimal(error)

        if value:
            quantity[QUANTITY.VALUE] = value
        if error:
            quantity[QUANTITY.E_VALUE] = error
        if unit:
            quantity[QUANTITY.U_VALUE] = unit
        if kind:
            quantity[QUANTITY.KIND] = kind

        return True

    def __deepcopy__(self, memo):
        """Define how an `Entry` should be deep copied."""
        new_entry = self.__class__(self.catalog)
        for key in self:
            if not key.startswith('__') and key != 'catalog':
                new_entry[key] = deepcopy(self[key])
        return new_entry

    def _load_data_from_json(self,
                             fhand,
                             clean=False,
                             merge=True,
                             pop_schema=True,
                             ignore_keys=[],
                             compare_to_existing=True,
                             gzip=False,
                             filter_on={}):
        # FIX: check for overwrite??"""
        self._log.debug("_load_data_from_json(): {}\n\t{}".format(self.name(),
                                                                  fhand))
        # Store the filename this was loaded from
        self.filename = fhand

        if gzip:
            jfil = gz.open(fhand, 'rb')
        else:
            jfil = codecs.open(fhand, 'r')

        data = json.load(jfil, object_pairs_hook=OrderedDict)
        name = list(data.keys())
        if len(name) != 1:
            err = "json file '{}' has multiple keys: {}".format(fhand,
                                                                list(name))
            self._log.error(err)
            raise ValueError(err)
        name = name[0]
        # Remove the outmost dict level
        data = data[name]
        self._log.debug("Name: {}".format(name))

        # Delete ignored keys
        for key in ignore_keys:
            if key in data:
                del data[key]

        # Convert the OrderedDict data from json into class structure i.e.
        # `Sources` will be extracted and created from the dict Everything
        # that remains afterwards should be okay to just store to this
        # `Entry`
        self._convert_odict_to_classes(
            data,
            clean=clean,
            merge=merge,
            pop_schema=pop_schema,
            compare_to_existing=compare_to_existing,
            filter_on=filter_on)
        if len(data):
            err_str = ("Remaining entries in `data` after "
                       "`_convert_odict_to_classes`.")
            err_str += "\n{}".format(dict_to_pretty_string(data))
            self._log.error(err_str)
            raise RuntimeError(err_str)

        jfil.close()

        # If object doesnt have a name yet, but json does, store it
        self_name = self[ENTRY.NAME]
        if len(self_name) == 0:
            self[ENTRY.NAME] = name
        # Warn if there is a name mismatch
        elif self_name.lower().strip() != name.lower().strip():
            self._log.warning("Object name '{}' does not match name in json:"
                              "'{}'".format(self_name, name))

        self.check()
        return

    def _convert_odict_to_classes(self,
                                  data,
                                  clean=False,
                                  merge=True,
                                  pop_schema=True,
                                  compare_to_existing=True,
                                  filter_on={}):
        """Convert `OrderedDict` into `Entry` or its derivative classes."""
        self._log.debug("_convert_odict_to_classes(): {}".format(self.name()))
        self._log.debug("This should be a temporary fix.  Dont be lazy.")

        # Setup filters. Currently only used for photometry.
        fkeys = list(filter_on.keys())

        # Handle 'name'
        name_key = self._KEYS.NAME
        if name_key in data:
            self[name_key] = data.pop(name_key)

        # Handle 'schema'
        schema_key = self._KEYS.SCHEMA
        if schema_key in data:
            # Schema should be re-added every execution (done elsewhere) so
            # just delete the old entry
            if pop_schema:
                data.pop(schema_key)
            else:
                self[schema_key] = data.pop(schema_key)

        # Cleanup 'internal' repository stuff
        if clean:
            # Add data to `self` in ways accomodating 'internal' formats and
            # leeway.  Removes each added entry from `data` so the remaining
            # stuff can be handled normally
            data = self.clean_internal(data)

        # Handle 'sources'
        # ----------------
        src_key = self._KEYS.SOURCES
        if src_key in data:
            # Remove from `data`
            sources = data.pop(src_key)
            self._log.debug("Found {} '{}' entries".format(
                len(sources), src_key))
            self._log.debug("{}: {}".format(src_key, sources))

            for src in sources:
                self.add_source(allow_alias=True, **src)

        # Handle `photometry`
        # -------------------
        photo_key = self._KEYS.PHOTOMETRY
        if photo_key in data:
            photoms = data.pop(photo_key)
            self._log.debug("Found {} '{}' entries".format(
                len(photoms), photo_key))
            phcount = 0
            for photo in photoms:
                skip = False
                for fkey in fkeys:
                    if fkey in photo and photo[fkey] not in filter_on[fkey]:
                        skip = True
                if skip:
                    continue
                self._add_cat_dict(
                    Photometry,
                    self._KEYS.PHOTOMETRY,
                    compare_to_existing=compare_to_existing,
                    **photo)
                phcount += 1
            self._log.debug("Added {} '{}' entries".format(
                phcount, photo_key))

        # Handle `spectra`
        # ---------------
        spec_key = self._KEYS.SPECTRA
        if spec_key in data:
            # When we are cleaning internal data, we don't always want to
            # require all of the normal spectrum data elements.
            spectra = data.pop(spec_key)
            self._log.debug("Found {} '{}' entries".format(
                len(spectra), spec_key))
            for spec in spectra:
                self._add_cat_dict(
                    Spectrum,
                    self._KEYS.SPECTRA,
                    compare_to_existing=compare_to_existing,
                    **spec)

        # Handle `error`
        # --------------
        err_key = self._KEYS.ERRORS
        if err_key in data:
            errors = data.pop(err_key)
            self._log.debug("Found {} '{}' entries".format(
                len(errors), err_key))
            for err in errors:
                self._add_cat_dict(Error, self._KEYS.ERRORS, **err)

        # Handle `models`
        # ---------------
        model_key = self._KEYS.MODELS
        if model_key in data:
            # When we are cleaning internal data, we don't always want to
            # require all of the normal spectrum data elements.
            model = data.pop(model_key)
            self._log.debug("Found {} '{}' entries".format(
                len(model), model_key))
            for mod in model:
                self._add_cat_dict(
                    Model,
                    self._KEYS.MODELS,
                    compare_to_existing=compare_to_existing,
                    **mod)

        # Handle everything else --- should be `Quantity`s
        # ------------------------------------------------
        if len(data):
            self._log.debug("{} remaining entries, assuming `Quantity`".format(
                len(data)))
            # Iterate over remaining keys
            for key in list(data.keys()):
                vals = data.pop(key)
                # All quantities should be in lists of that quantity
                #    E.g. `aliases` is a list of alias quantities
                if not isinstance(vals, list):
                    vals = [vals]
                self._log.debug("{}: {}".format(key, vals))
                for vv in vals:
                    self._add_cat_dict(
                        Quantity,
                        key,
                        check_for_dupes=merge,
                        compare_to_existing=compare_to_existing,
                        **vv)

        if merge and self.dupe_of:
            self.merge_dupes()

        return

    def _check_cat_dict_source(self, cat_dict_class, key_in_self, **kwargs):
        """Check that a source exists and that a quantity isn't erroneous."""
        # Make sure that a source is given
        source = kwargs.get(cat_dict_class._KEYS.SOURCE, None)
        if source is None:
            raise CatDictError(
                "{}: `source` must be provided!".format(self[self._KEYS.NAME]),
                warn=True)
        # Check that source is a list of integers
        for x in source.split(','):
            if not is_integer(x):
                raise CatDictError(
                    "{}: `source` is comma-delimited list of "
                    " integers!".format(self[self._KEYS.NAME]),
                    warn=True)
        # If this source/data is erroneous, skip it
        if self.is_erroneous(key_in_self, source):
            self._log.info("This source is erroneous, skipping")
            return None
        # If this source/data is private, skip it
        if (self.catalog.args is not None and not self.catalog.args.private and
                self.is_private(key_in_self, source)):
            self._log.info("This source is private, skipping")
            return None
        return source

    def _init_cat_dict(self, cat_dict_class, key_in_self, **kwargs):
        """Initialize a CatDict object, checking for errors."""
        # Catch errors associated with crappy, but not unexpected data
        try:
            new_entry = cat_dict_class(self, key=key_in_self, **kwargs)
        except CatDictError as err:
            if err.warn:
                self._log.info("'{}' Not adding '{}': '{}'".format(self[
                    self._KEYS.NAME], key_in_self, str(err)))
            return None
        return new_entry

    def _add_cat_dict(self,
                      cat_dict_class,
                      key_in_self,
                      check_for_dupes=True,
                      compare_to_existing=True,
                      **kwargs):
        """Add a `CatDict` to this `Entry`.

        CatDict only added if initialization succeeds and it
        doesn't already exist within the Entry.
        """
        # Make sure that a source is given, and is valid (nor erroneous)
        if cat_dict_class != Error:
            try:
                source = self._check_cat_dict_source(cat_dict_class,
                                                     key_in_self, **kwargs)
            except CatDictError as err:
                if err.warn:
                    self._log.info("'{}' Not adding '{}': '{}'".format(self[
                        self._KEYS.NAME], key_in_self, str(err)))
                return False

            if source is None:
                return False

        # Try to create a new instance of this subclass of `CatDict`
        new_entry = self._init_cat_dict(cat_dict_class, key_in_self, **kwargs)
        if new_entry is None:
            return False

        # Compare this new entry with all previous entries to make sure is new
        if compare_to_existing and cat_dict_class != Error:
            for item in self.get(key_in_self, []):
                if new_entry.is_duplicate_of(item):
                    item.append_sources_from(new_entry)
                    # Return the entry in case we want to use any additional
                    # tags to augment the old entry
                    return new_entry

        # If this is an alias, add it to the parent catalog's reverse
        # dictionary linking aliases to names for fast lookup.
        if key_in_self == self._KEYS.ALIAS:
            # Check if this adding this alias makes us a dupe, if so mark
            # ourselves as a dupe.
            if (check_for_dupes and 'aliases' in dir(self.catalog) and
                    new_entry[QUANTITY.VALUE] in self.catalog.aliases):
                possible_dupe = self.catalog.aliases[new_entry[QUANTITY.VALUE]]
                # print(possible_dupe)
                if (possible_dupe != self[self._KEYS.NAME] and
                        possible_dupe in self.catalog.entries):
                    self.dupe_of.append(possible_dupe)
            if 'aliases' in dir(self.catalog):
                self.catalog.aliases[new_entry[QUANTITY.VALUE]] = self[
                    self._KEYS.NAME]

        self.setdefault(key_in_self, []).append(new_entry)

        if (key_in_self == self._KEYS.ALIAS and check_for_dupes and
                self.dupe_of):
            self.merge_dupes()

        return True

    @classmethod
    def get_filename(cls, name):
        """Convert from an `Entry` name into an appropriate filename."""
        fname = name.replace('/', '_')
        return fname

    @classmethod
    def init_from_file(cls,
                       catalog,
                       name=None,
                       path=None,
                       clean=False,
                       merge=True,
                       pop_schema=True,
                       ignore_keys=[],
                       compare_to_existing=True,
                       try_gzip=False,
                       filter_on={}):
        """Construct a new `Entry` instance from an input file.

        The input file can be given explicitly by `path`, or a path will
        be constructed appropriately if possible.

        Arguments
        ---------
        catalog : `astrocats.catalog.catalog.Catalog` instance
            The parent catalog object of which this entry belongs.
        name : str or 'None'
            The name of this entry, e.g. `SN1987A` for a `Supernova` entry.
            If no `path` is given, a path is constructed by trying to find
            a file in one of the 'output' repositories with this `name`.
            note: either `name` or `path` must be provided.
        path : str or 'None'
            The absolutely path of the input file.
            note: either `name` or `path` must be provided.
        clean : bool
            Whether special sanitization processing should be done on the input
            data.  This is mostly for input files from the 'internal'
            repositories.

        """
        if not catalog:
            from astrocats.catalog.catalog import Catalog
            log = logging.getLogger()
            catalog = Catalog(None, log)

        catalog.log.debug("init_from_file()")
        if name is None and path is None:
            err = ("Either entry `name` or `path` must be specified to load "
                   "entry.")
            log.error(err)
            raise ValueError(err)

        # If the path is given, use that to load from
        load_path = ''
        if path is not None:
            load_path = path
            name = ''
        # If the name is given, try to find a path for it
        else:
            repo_paths = catalog.PATHS.get_repo_output_folders()
            for rep in repo_paths:
                filename = cls.get_filename(name)
                newpath = os.path.join(rep, filename + '.json')
                if os.path.isfile(newpath):
                    load_path = newpath
                    break

        if load_path is None or not os.path.isfile(load_path):
            # FIX: is this warning worthy?
            return None

        # Create a new `Entry` instance
        new_entry = cls(catalog, name)

        # Check if .gz file
        if try_gzip and not load_path.endswith('.gz'):
            try_gzip = False

        # Fill it with data from json file
        new_entry._load_data_from_json(
            load_path,
            clean=clean,
            merge=merge,
            pop_schema=pop_schema,
            ignore_keys=ignore_keys,
            compare_to_existing=compare_to_existing,
            gzip=try_gzip,
            filter_on=filter_on)

        return new_entry

    def add_alias(self, alias, source, clean=True):
        """Add an alias, optionally 'cleaning' the alias string.

        Calls the parent `catalog` method `clean_entry_name` - to apply the
        same name-cleaning as is applied to entry names themselves.

        Returns
        -------
        alias : str
            The stored version of the alias (cleaned or not).

        """
        if clean:
            alias = self.catalog.clean_entry_name(alias)
        self.add_quantity(self._KEYS.ALIAS, alias, source)
        return alias

    def add_error(self, value, **kwargs):
        """Add an `Error` instance to this entry."""
        kwargs.update({ERROR.VALUE: value})
        self._add_cat_dict(Error, self._KEYS.ERRORS, **kwargs)
        return

    def add_photometry(self, compare_to_existing=True, **kwargs):
        """Add a `Photometry` instance to this entry."""
        self._add_cat_dict(
            Photometry,
            self._KEYS.PHOTOMETRY,
            compare_to_existing=compare_to_existing,
            **kwargs)
        return

    def merge_dupes(self):
        """Merge two entries that correspond to the same entry."""
        for dupe in self.dupe_of:
            if dupe in self.catalog.entries:
                if self.catalog.entries[dupe]._stub:
                    # merge = False to avoid infinite recursion
                    self.catalog.load_entry_from_name(
                        dupe, delete=True, merge=False)
                self.catalog.copy_entry_to_entry(self.catalog.entries[dupe],
                                                 self)
                del self.catalog.entries[dupe]
        self.dupe_of = []

    def add_quantity(self,
                     quantities,
                     value,
                     source,
                     check_for_dupes=True,
                     compare_to_existing=True,
                     **kwargs):
        """Add an `Quantity` instance to this entry."""
        success = True
        for quantity in listify(quantities):
            kwargs.update({QUANTITY.VALUE: value, QUANTITY.SOURCE: source})
            cat_dict = self._add_cat_dict(
                Quantity,
                quantity,
                compare_to_existing=compare_to_existing,
                check_for_dupes=check_for_dupes,
                **kwargs)
            if isinstance(cat_dict, CatDict):
                self._append_additional_tags(quantity, source, cat_dict)
                success = False

        return success

    def add_self_source(self):
        """Add a source that refers to the catalog itself.

        For now this points to the Open Supernova Catalog by default.
        """
        return self.add_source(
            bibcode=self.catalog.OSC_BIBCODE,
            name=self.catalog.OSC_NAME,
            url=self.catalog.OSC_URL,
            secondary=True)

    def add_source(self, allow_alias=False, **kwargs):
        """Add a `Source` instance to this entry."""
        if not allow_alias and SOURCE.ALIAS in kwargs:
            err_str = "`{}` passed in kwargs, this shouldn't happen!".format(
                SOURCE.ALIAS)
            self._log.error(err_str)
            raise RuntimeError(err_str)

        # Set alias number to be +1 of current number of sources
        if SOURCE.ALIAS not in kwargs:
            kwargs[SOURCE.ALIAS] = str(self.num_sources() + 1)
        source_obj = self._init_cat_dict(Source, self._KEYS.SOURCES, **kwargs)
        if source_obj is None:
            return None

        for item in self.get(self._KEYS.SOURCES, ''):
            if source_obj.is_duplicate_of(item):
                return item[item._KEYS.ALIAS]

        self.setdefault(self._KEYS.SOURCES, []).append(source_obj)
        return source_obj[source_obj._KEYS.ALIAS]

    def add_model(self, allow_alias=False, **kwargs):
        """Add a `Model` instance to this entry."""
        if not allow_alias and MODEL.ALIAS in kwargs:
            err_str = "`{}` passed in kwargs, this shouldn't happen!".format(
                SOURCE.ALIAS)
            self._log.error(err_str)
            raise RuntimeError(err_str)

        # Set alias number to be +1 of current number of models
        if MODEL.ALIAS not in kwargs:
            kwargs[MODEL.ALIAS] = str(self.num_models() + 1)
        model_obj = self._init_cat_dict(Model, self._KEYS.MODELS, **kwargs)
        if model_obj is None:
            return None

        for item in self.get(self._KEYS.MODELS, ''):
            if model_obj.is_duplicate_of(item):
                return item[item._KEYS.ALIAS]

        self.setdefault(self._KEYS.MODELS, []).append(model_obj)
        return model_obj[model_obj._KEYS.ALIAS]

    def add_spectrum(self, compare_to_existing=True, **kwargs):
        """Add a `Spectrum` instance to this entry."""
        spec_key = self._KEYS.SPECTRA
        # Make sure that a source is given, and is valid (nor erroneous)
        source = self._check_cat_dict_source(Spectrum, spec_key, **kwargs)
        if source is None:
            return None

        # Try to create a new instance of `Spectrum`
        new_spectrum = self._init_cat_dict(Spectrum, spec_key, **kwargs)
        if new_spectrum is None:
            return None

        is_dupe = False
        for item in self.get(spec_key, []):
            # Only the `filename` should be compared for duplicates. If a
            # duplicate is found, that means the previous `exclude` array
            # should be saved to the new object, and the old deleted
            if new_spectrum.is_duplicate_of(item):
                if SPECTRUM.EXCLUDE in new_spectrum:
                    item[SPECTRUM.EXCLUDE] = new_spectrum[SPECTRUM.EXCLUDE]
                elif SPECTRUM.EXCLUDE in item:
                    item.update(new_spectrum)
                is_dupe = True
                break

        if not is_dupe:
            self.setdefault(spec_key, []).append(new_spectrum)
        return

    def check(self):
        """Check that the entry has the required fields."""
        # Make sure there is a schema key in dict
        if self._KEYS.SCHEMA not in self:
            self[self._KEYS.SCHEMA] = self.catalog.SCHEMA.URL
        # Make sure there is a name key in dict
        if (self._KEYS.NAME not in self or len(self[self._KEYS.NAME]) == 0):
            raise ValueError("Entry name is empty:\n\t{}".format(
                json.dumps(
                    self, indent=2)))
        return

    def clean_internal(self, data=None):
        """Clean input from 'internal', human added data.

        This is used in the 'Entry.init_from_file' method.
        """
        return data

    def extra_aliases(self):
        """Return aliases considered when merging duplicates."""
        return []

    def get_aliases(self, includename=True):
        """Retrieve the aliases of this object as a list of strings.

        Arguments
        ---------
        includename : bool
            Include the 'name' parameter in the list of aliases.
        """
        # empty list if doesnt exist
        alias_quanta = self.get(self._KEYS.ALIAS, [])
        aliases = [aq[QUANTITY.VALUE] for aq in alias_quanta]
        if includename and self[self._KEYS.NAME] not in aliases:
            aliases = [self[self._KEYS.NAME]] + aliases
        return aliases

    def get_entry_text(self, fname):
        """Retrieve the raw text from a file."""
        if fname.split('.')[-1] == 'gz':
            with gz.open(fname, 'rt') as f:
                filetext = f.read()
        else:
            with codecs.open(fname, 'r') as f:
                filetext = f.read()
        return filetext

    def get_source_by_alias(self, alias):
        """Given an alias, find the corresponding source in this entry.

        If the given alias doesn't exist (e.g. there are no sources), then a
        `ValueError` is raised.

        Arguments
        ---------
        alias : str
            The str-integer (e.g. '8') of the target source.

        Returns
        -------
        source : `astrocats.catalog.source.Source` object
            The source object corresponding to the passed alias.

        """
        for source in self.get(self._KEYS.SOURCES, []):
            if source[self._KEYS.ALIAS] == alias:
                return source
        raise ValueError("Source '{}': alias '{}' not found!".format(self[
            self._KEYS.NAME], alias))

    def get_stub(self):
        """Get a new `Entry` which contains the 'stub' of this one.

        The 'stub' is only the name and aliases.

        Usage:
        -----
        To convert a normal entry into a stub (for example), overwrite the
        entry in place, i.e.
        >>> entries[name] = entries[name].get_stub()

        Returns
        -------
        stub : `astrocats.catalog.entry.Entry` subclass object
            The type of the returned object is this instance's type.

        """
        stub = type(self)(self.catalog, self[self._KEYS.NAME], stub=True)
        if self._KEYS.ALIAS in self:
            stub[self._KEYS.ALIAS] = self[self._KEYS.ALIAS]
        if self._KEYS.DISTINCT_FROM in self:
            stub[self._KEYS.DISTINCT_FROM] = self[self._KEYS.DISTINCT_FROM]
        if self._KEYS.RA in self:
            stub[self._KEYS.RA] = self[self._KEYS.RA]
        if self._KEYS.DEC in self:
            stub[self._KEYS.DEC] = self[self._KEYS.DEC]
        if self._KEYS.DISCOVER_DATE in self:
            stub[self._KEYS.DISCOVER_DATE] = self[self._KEYS.DISCOVER_DATE]
        if self._KEYS.SOURCES in self:
            stub[self._KEYS.SOURCES] = self[self._KEYS.SOURCES]
        return stub

    def is_erroneous(self, field, sources):
        """Check if attribute has been marked as being erroneous."""
        if self._KEYS.ERRORS in self:
            my_errors = self[self._KEYS.ERRORS]
            for alias in sources.split(','):
                source = self.get_source_by_alias(alias)
                bib_err_values = [
                    err[ERROR.VALUE] for err in my_errors
                    if err[ERROR.KIND] == SOURCE.BIBCODE and
                    err[ERROR.EXTRA] == field
                ]
                if (SOURCE.BIBCODE in source and
                        source[SOURCE.BIBCODE] in bib_err_values):
                    return True

                name_err_values = [
                    err[ERROR.VALUE] for err in my_errors
                    if err[ERROR.KIND] == SOURCE.NAME and err[ERROR.EXTRA] ==
                    field
                ]
                if (SOURCE.NAME in source and
                        source[SOURCE.NAME] in name_err_values):
                    return True

        return False

    def is_private(self, key, sources):
        """Check if attribute is private."""
        # aliases are always public.
        if key == ENTRY.ALIAS:
            return False
        return all([
            SOURCE.PRIVATE in self.get_source_by_alias(x)
            for x in sources.split(',')
        ])

    def name(self):
        """Return own name."""
        try:
            return self[self._KEYS.NAME]
        except KeyError:
            return None

    def num_sources(self):
        """Return the current number of sources stored in this instance.

        Returns
        -------
        len : int
            The *integer* number of existing sources.
        """
        return len(self.get(self._KEYS.SOURCES, []))

    def num_models(self):
        """Return the current number of models stored in this instance.

        Returns
        -------
        len : int
            The *integer* number of existing models.
        """
        return len(self.get(self._KEYS.MODELS, []))

    def priority_prefixes(self):
        """Return prefixes to given priority when merging duplicate entries."""
        return ()

    def sanitize(self):
        """Sanitize the data (sort it, etc.) before writing it to disk.

        Template method that can be overridden in each catalog's subclassed
        `Entry` object.
        """
        name = self[self._KEYS.NAME]

        aliases = self.get_aliases(includename=False)
        if name not in aliases:
            # Assign the first source to alias, if not available assign us.
            if self._KEYS.SOURCES in self:
                self.add_quantity(self._KEYS.ALIAS, name, '1')
                if self._KEYS.ALIAS not in self:
                    source = self.add_self_source()
                    self.add_quantity(self._KEYS.ALIAS, name, source)
            else:
                source = self.add_self_source()
                self.add_quantity(self._KEYS.ALIAS, name, source)

        if self._KEYS.ALIAS in self:
            self[self._KEYS.ALIAS].sort(
                key=lambda key: alias_priority(name, key[QUANTITY.VALUE]))
        else:
            self._log.error(
                'There should be at least one alias for `{}`.'.format(name))

        if self._KEYS.PHOTOMETRY in self:
            self[self._KEYS.PHOTOMETRY].sort(
                key=lambda x: ((float(x[PHOTOMETRY.TIME]) if
                                isinstance(x[PHOTOMETRY.TIME],
                                           (basestring, float, int))
                                else min([float(y) for y in
                                          x[PHOTOMETRY.TIME]])) if
                               PHOTOMETRY.TIME in x else 0.0,
                               x[PHOTOMETRY.BAND] if PHOTOMETRY.BAND in
                               x else '',
                               float(x[PHOTOMETRY.MAGNITUDE]) if
                               PHOTOMETRY.MAGNITUDE in x else ''))

        if (self._KEYS.SPECTRA in self and list(
                filter(None, [
                    SPECTRUM.TIME in x for x in self[self._KEYS.SPECTRA]
                ]))):
            self[self._KEYS.SPECTRA].sort(
                key=lambda x: (float(x[SPECTRUM.TIME]) if
                               SPECTRUM.TIME in x else 0.0,
                               x[SPECTRUM.FILENAME] if
                               SPECTRUM.FILENAME in x else '')
            )

        if self._KEYS.SOURCES in self:
            # Remove orphan sources
            source_aliases = [
                x[SOURCE.ALIAS] for x in self[self._KEYS.SOURCES]
            ]
            # Sources with the `PRIVATE` attribute are always retained
            source_list = [
                x[SOURCE.ALIAS] for x in self[self._KEYS.SOURCES]
                if SOURCE.PRIVATE in x
            ]
            for key in self:
                # if self._KEYS.get_key_by_name(key).no_source:
                if (key in [
                        self._KEYS.NAME, self._KEYS.SCHEMA, self._KEYS.SOURCES,
                        self._KEYS.ERRORS
                ]):
                    continue
                for item in self[key]:
                    source_list += item[item._KEYS.SOURCE].split(',')
            new_src_list = sorted(
                list(set(source_aliases).intersection(source_list)))
            new_sources = []
            for source in self[self._KEYS.SOURCES]:
                if source[SOURCE.ALIAS] in new_src_list:
                    new_sources.append(source)
                else:
                    self._log.info('Removing orphaned source from `{}`.'
                                   .format(name))

            if not new_sources:
                del self[self._KEYS.SOURCES]

            self[self._KEYS.SOURCES] = new_sources

    def save(self, bury=False, final=False):
        """Write entry to JSON file in the proper location.

        Arguments
        ---------
        bury : bool

        final : bool
            If this is the 'final' save, perform additional sanitization and
            cleaning operations.

        """
        outdir, filename = self._get_save_path(bury=bury)

        if final:
            self.sanitize()

        # FIX: use 'dump' not 'dumps'
        jsonstring = json.dumps(
            {
                self[self._KEYS.NAME]: self._ordered(self)
            },
            indent='\t' if sys.version_info[0] >= 3 else 4,
            separators=(',', ':'),
            ensure_ascii=False)
        if not os.path.isdir(outdir):
            raise RuntimeError("Output directory '{}' for event '{}' does "
                               "not exist.".format(outdir, self[
                                   self._KEYS.NAME]))
        save_name = os.path.join(outdir, filename + '.json')
        with codecs.open(save_name, 'w', encoding='utf8') as sf:
            sf.write(jsonstring)

        if not os.path.exists(save_name):
            raise RuntimeError("File '{}' was not saved!".format(save_name))

        return save_name

    def set_preferred_name(self):
        """Set a preferred name for the entry."""
        return self[self._KEYS.NAME]

    def sort_func(self, key):
        """Used to sort keys when writing Entry to JSON format.

        Should be supplemented/overridden by inheriting classes.
        """
        if key == self._KEYS.SCHEMA:
            return 'aaa'
        if key == self._KEYS.NAME:
            return 'aab'
        if key == self._KEYS.SOURCES:
            return 'aac'
        if key == self._KEYS.ALIAS:
            return 'aad'
        if key == self._KEYS.MODELS:
            return 'aae'
        if key == self._KEYS.PHOTOMETRY:
            return 'zzy'
        if key == self._KEYS.SPECTRA:
            return 'zzz'
        return key
Esempio n. 3
0
    def init_from_file(cls,
                       catalog,
                       name=None,
                       path=None,
                       clean=False,
                       merge=True,
                       pop_schema=True,
                       ignore_keys=[],
                       compare_to_existing=True,
                       try_gzip=False,
                       filter_on={}):
        """Construct a new `Entry` instance from an input file.

        The input file can be given explicitly by `path`, or a path will
        be constructed appropriately if possible.

        Arguments
        ---------
        catalog : `astrocats.catalog.catalog.Catalog` instance
            The parent catalog object of which this entry belongs.
        name : str or 'None'
            The name of this entry, e.g. `SN1987A` for a `Supernova` entry.
            If no `path` is given, a path is constructed by trying to find
            a file in one of the 'output' repositories with this `name`.
            note: either `name` or `path` must be provided.
        path : str or 'None'
            The absolutely path of the input file.
            note: either `name` or `path` must be provided.
        clean : bool
            Whether special sanitization processing should be done on the input
            data.  This is mostly for input files from the 'internal'
            repositories.

        """
        if not catalog:
            from astrocats.catalog.catalog import Catalog
            log = logging.getLogger()
            catalog = Catalog(None, log)

        catalog.log.debug("init_from_file()")
        if name is None and path is None:
            err = ("Either entry `name` or `path` must be specified to load "
                   "entry.")
            log.error(err)
            raise ValueError(err)

        # If the path is given, use that to load from
        load_path = ''
        if path is not None:
            load_path = path
            name = ''
        # If the name is given, try to find a path for it
        else:
            repo_paths = catalog.PATHS.get_repo_output_folders()
            for rep in repo_paths:
                filename = cls.get_filename(name)
                newpath = os.path.join(rep, filename + '.json')
                if os.path.isfile(newpath):
                    load_path = newpath
                    break

        if load_path is None or not os.path.isfile(load_path):
            # FIX: is this warning worthy?
            return None

        # Create a new `Entry` instance
        new_entry = cls(catalog, name)

        # Check if .gz file
        if try_gzip and not load_path.endswith('.gz'):
            try_gzip = False

        # Fill it with data from json file
        new_entry._load_data_from_json(
            load_path,
            clean=clean,
            merge=merge,
            pop_schema=pop_schema,
            ignore_keys=ignore_keys,
            compare_to_existing=compare_to_existing,
            gzip=try_gzip,
            filter_on=filter_on)

        return new_entry
Esempio n. 4
0
    def generate_event_list(self, event_list):
        """Generate a list of events and/or convert events to JSON format."""
        prt = self._printer
        cidict = OrderedDict()
        intro_shown = False

        new_event_list = []
        previous_file = None
        for event in event_list:
            rsource = {SOURCE.NAME: self._DEFAULT_SOURCE}
            use_self_source = None
            new_events = []
            toffset = Decimal('0')
            if ('.' in event and os.path.isfile(event) and
                    not event.endswith('.json')):
                if not intro_shown:
                    prt.message('converter_info')
                    intro_shown = True

                prt.message('converting_to_json', [event])

                with open(event, 'r') as f:
                    ftxt = f.read()

                # Try a couple of table formats from astropy.
                table = None
                try:
                    table = read(ftxt, Reader=Cds, guess=False)
                except Exception:
                    pass
                else:
                    prt.message('convert_cds')
                    flines = [table.colnames] + [
                        list(x) for x in np.array(table).tolist()]
                    for i in range(len(flines)):
                        flines[i] = [str(x) for x in flines[i]]

                try:
                    table = read(ftxt, Reader=Latex, guess=False)
                except Exception:
                    pass
                else:
                    prt.message('convert_latex')
                    flines = [table.colnames] + [
                        list(x) for x in np.array(table).tolist()]

                if table is None:
                    # Count to try and determine delimiter.
                    delims = [' ', '\t', ',', ';', '|', '&']
                    delimnames = [
                        'Space: ` `', 'Tab: `\t`', 'Comma: `,`',
                        'Semi-colon: `;`', 'Bar: `|`', 'Ampersand: `&`']
                    delim = None
                    delimcounts = [ftxt.count(x) for x in delims]
                    maxdelimcount = max(delimcounts)
                    delim = delims[delimcounts.index(maxdelimcount)]
                    # If two delimiter options are close in count, ask user.
                    for i, x in enumerate(delimcounts):
                        if x > 0.5 * maxdelimcount and delims[i] != delim:
                            delim = None
                    if delim is None:
                        odelims = list(np.array(delimnames)[
                            np.array(delimcounts) > 0])
                        delim = delims[prt.prompt(
                            'delim', kind='option', options=odelims) - 1]
                    ad = list(delims)
                    ad.remove(delim)
                    ad = ''.join(ad)

                    fsplit = ftxt.splitlines()
                    fsplit = [
                        x.replace('$', '').replace('\\pm', delim)
                        .replace('±', delim).replace('(', delim + '(')
                        .strip(ad + '()# ').replace('′', "'")
                        for x in fsplit]
                    flines = []
                    for fs in fsplit:
                        flines.append(list(
                            csv.reader([fs], delimiter=delim))[0])

                    flines = [[
                        x.strip(ad + '#$()\\')
                        for x in y] for y in flines]

                    # Find band columns if they exist and insert error columns
                    # if they don't exist.
                    for fi, fl in enumerate(list(flines)):
                        flcopy = list(fl)
                        offset = 0
                        if not any([is_number(x) for x in fl]):
                            for fci, fc in enumerate(fl):
                                if (fc in self._band_names and
                                    (fci == len(fl) - 1 or
                                     fl[fci + 1] not in self._emagstrs)):
                                    flcopy.insert(fci + 1 + offset, 'e mag')
                                    offset += 1
                        flines[fi] = flcopy

                    # Find the most frequent column count. These are probably
                    # the tables we wish to read.
                    flens = [len(x) for x in flines]
                    ncols = Counter(flens).most_common(1)[0][0]

                    newlines = []
                    potential_name = None
                    for fi, fl in enumerate(flines):
                        if (len(fl) and flens[fi] == 1 and
                            fi < len(flines) - 1 and
                                flens[fi + 1] == ncols and not len(newlines)):
                            potential_name = fl[0]
                        if flens[fi] == ncols:
                            if potential_name is not None and any(
                                    [is_number(x) for x in fl]):
                                newlines.append([potential_name] + list(fl))
                            else:
                                newlines.append(list(fl))
                    flines = newlines
                    for fi, fl in enumerate(flines):
                        if len(fl) == ncols and potential_name is not None:
                            if not any([is_number(x) for x in fl]):
                                flines[fi] = ['name'] + list(fl)

                # If none of the rows contain numeric data, the file
                # is likely a list of transient names.
                if (len(flines) and
                    (not any(any([is_number(x) or x == '' for x in y])
                             for y in flines) or
                     len(flines) == 1)):
                    new_events = [
                        it for s in flines for it in s]

                # If last row is numeric, then likely this is a file with
                # transient data.
                elif (len(flines) > 1 and
                        any([is_number(x) for x in flines[-1]])):

                    # Check that each row has the same number of columns.
                    if len(set([len(x) for x in flines])) > 1:
                        print(set([len(x) for x in flines]))
                        raise ValueError(
                            'Number of columns in each row not '
                            'consistent!')

                    if len(cidict) and len(new_event_list):
                        msg = ('is_file_same' if
                               previous_file else 'is_event_same')
                        reps = [previous_file] if previous_file else [''.join(
                            new_event_list[-1].split('.')[:-1])]
                        text = prt.text(msg, reps)
                        is_same = prt.prompt(text, message=False,
                                             kind='bool')
                        if not is_same:
                            cidict = OrderedDict()

                    # If the first row has no numbers it is likely a header.
                    if not len(cidict):
                        self.assign_columns(cidict, flines)

                    perms = 1
                    for key in cidict:
                        if isinstance(cidict[key], list) and not isinstance(
                                cidict[key], string_types):
                            if cidict[key][0] != 'j':
                                perms = len(cidict[key])

                    # Get event name (if single event) or list of names from
                    # table.
                    event_names = []
                    if ENTRY.NAME in cidict:
                        for fi, fl in enumerate(flines):
                            flines[fi][cidict[ENTRY.NAME]] = name_clean(
                                fl[cidict[ENTRY.NAME]])
                        event_names = list(sorted(set([
                            x[cidict[ENTRY.NAME]] for x in flines[
                                self._first_data:]])))
                        new_events = [x + '.json' for x in event_names]
                    else:
                        new_event_name = '.'.join(event.split(
                            '.')[:-1]).split('/')[-1]
                        text = prt.message(
                            'is_event_name', [new_event_name], prt=False)
                        is_name = prt.prompt(text, message=False,
                                             kind='bool', default='y')
                        if not is_name:
                            new_event_name = ''
                            while new_event_name.strip() == '':
                                new_event_name = prt.prompt(
                                    'enter_name', kind='string')
                        event_names.append(new_event_name)
                        new_events = [new_event_name + '.json']

                    # Create a new event, populate the photometry, and dump
                    # to a JSON file in the run directory.
                    entries = OrderedDict([(x, Entry(name=x))
                                           for x in event_names])

                    # Clean up the data a bit now that we know the column
                    # identities.

                    # Strip common prefixes/suffixes from band names
                    if PHOTOMETRY.BAND in cidict:
                        bi = cidict[PHOTOMETRY.BAND]
                        for d in [True, False]:
                            if not isinstance(bi, (int, np.integer)):
                                break
                            strip_cols = []
                            lens = [len(x[bi])
                                    for x in flines[self._first_data:]]
                            llen = min(lens)
                            ra = range(llen) if d else range(-1, -llen - 1, -1)
                            for li in ra:
                                letter = None
                                for row in list(flines[self._first_data:]):
                                    if letter is None:
                                        letter = row[bi][li]
                                    elif row[bi][li] != letter:
                                        letter = None
                                        break
                                if letter is not None:
                                    strip_cols.append(li)
                                else:
                                    break
                            if len(strip_cols) == llen:
                                break
                            for ri in range(len(flines[self._first_data:])):
                                flines[self._first_data + ri][bi] = ''.join(
                                    [c for i, c in enumerate(flines[
                                        self._first_data + ri][bi])
                                     if (i if d else i - len(flines[
                                         self._first_data + ri][bi])) not in
                                     strip_cols])

                    if (PHOTOMETRY.TIME in cidict and
                            (not isinstance(cidict[PHOTOMETRY.TIME], list) or
                             len(cidict[PHOTOMETRY.TIME]) <= 2)):
                        bi = cidict[PHOTOMETRY.TIME]

                        if isinstance(bi, list) and not isinstance(
                            bi, string_types) and isinstance(
                                bi[0], string_types) and bi[0] == 'jd':
                            bi = bi[-1]

                        mmtimes = [float(x[bi])
                                   for x in flines[self._first_data:]]
                        mintime, maxtime = min(mmtimes), max(mmtimes)

                        if mintime < 10000:
                            while True:
                                try:
                                    response = prt.prompt(
                                        'small_time_offset', kind='string')
                                    if response is not None:
                                        toffset = Decimal(response)
                                    break
                                except Exception:
                                    pass
                        elif maxtime > 60000 and cidict[
                                PHOTOMETRY.TIME][0] != 'jd':
                            isjd = prt.prompt(
                                'large_time_offset',
                                kind='bool', default='y')
                            if isjd:
                                toffset = Decimal('-2400000.5')

                    for row in flines[self._first_data:]:
                        photodict = {}
                        rname = (row[cidict[ENTRY.NAME]]
                                 if ENTRY.NAME in cidict else event_names[0])
                        for pi in range(perms):
                            sources = set()
                            for key in cidict:
                                if key in self._bool_keys:
                                    rval = row[cidict[key]]

                                    if rval in self._FALSE_VALS:
                                        rval = False
                                    elif rval in self._TRUE_VALS:
                                        rval = True

                                    if type(rval) != 'bool':
                                        try:
                                            rval = bool(rval)
                                        except Exception:
                                            pass

                                    if type(rval) != 'bool':
                                        try:
                                            rval = bool(float(rval))
                                        except Exception:
                                            rval = True

                                    if not rval:
                                        continue
                                    row[cidict[key]] = rval
                                elif key == 'reference':
                                    if (isinstance(cidict[key],
                                                   string_types) and
                                            len(cidict[key]) == 19):
                                        new_src = entries[rname].add_source(
                                            bibcode=cidict[key])
                                        sources.update(new_src)
                                        row[
                                            cidict[key]] = new_src
                                elif key == ENTRY.NAME:
                                    continue
                                elif (isinstance(key, Key) and
                                        key.type == KEY_TYPES.TIME and
                                        isinstance(cidict[key], list) and not
                                        isinstance(cidict[key],
                                                   string_types)):
                                    tval = np.array(row)[np.array(cidict[key][
                                        1:], dtype=int)]
                                    if cidict[key][0] == 'j':
                                        date = '-'.join([x.zfill(2) for x in
                                                         tval])
                                        date = self._month_rep.sub(
                                            lambda x: self._MONTH_IDS[
                                                x.group()], date)
                                        photodict[key] = str(
                                            astrotime(date, format='isot').mjd)
                                    elif cidict[key][0] == 'jd':
                                        photodict[key] = str(
                                            jd_to_mjd(Decimal(tval[-1])))
                                    continue

                                val = cidict[key]
                                if (isinstance(val, list) and not
                                        isinstance(val, string_types)):
                                    val = val[pi]
                                    if isinstance(val, string_types):
                                        if val != '':
                                            photodict[key] = val
                                    else:
                                        photodict[key] = row[val]
                                else:
                                    if isinstance(val, string_types):
                                        if val != '':
                                            photodict[key] = val
                                    else:
                                        photodict[key] = row[val]
                            if self._data_type == 2:
                                if self._zp:
                                    photodict[PHOTOMETRY.ZERO_POINT] = self._zp
                                else:
                                    photodict[PHOTOMETRY.ZERO_POINT] = (
                                        row[cidict[PHOTOMETRY.ZERO_POINT][pi]]
                                        if isinstance(cidict[
                                            PHOTOMETRY.ZERO_POINT], list) else
                                        row[cidict[PHOTOMETRY.ZERO_POINT]])
                                zpp = photodict[PHOTOMETRY.ZERO_POINT]
                                cc = (
                                    row[cidict[PHOTOMETRY.COUNT_RATE][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.COUNT_RATE], list) else
                                    row[cidict[PHOTOMETRY.COUNT_RATE]])
                                ecc = (
                                    row[cidict[PHOTOMETRY.E_COUNT_RATE][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.E_COUNT_RATE], list) else
                                    row[cidict[PHOTOMETRY.E_COUNT_RATE]])
                                if '<' in cc:
                                    set_pd_mag_from_counts(
                                        photodict, ec=cc.strip('<'), zp=zpp)
                                else:
                                    set_pd_mag_from_counts(
                                        photodict, c=cc, ec=ecc, zp=zpp)
                            elif self._data_type == 3:
                                photodict[
                                    PHOTOMETRY.U_FLUX_DENSITY] = self._ufd
                                if PHOTOMETRY.U_FLUX_DENSITY in cidict:
                                    photodict[PHOTOMETRY.U_FLUX_DENSITY] = (
                                        row[cidict[
                                            PHOTOMETRY.U_FLUX_DENSITY][pi]]
                                        if isinstance(cidict[
                                            PHOTOMETRY.
                                            U_FLUX_DENSITY], list) else
                                        row[cidict[PHOTOMETRY.U_FLUX_DENSITY]])
                                if photodict[
                                        PHOTOMETRY.U_FLUX_DENSITY] == '':
                                    photodict[
                                        PHOTOMETRY.U_FLUX_DENSITY] = 'µJy'
                                fd = (
                                    row[cidict[PHOTOMETRY.FLUX_DENSITY][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.FLUX_DENSITY], list) else
                                    row[cidict[PHOTOMETRY.FLUX_DENSITY]])
                                efd = (
                                    row[cidict[
                                        PHOTOMETRY.E_FLUX_DENSITY][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.E_FLUX_DENSITY], list) else
                                    row[cidict[PHOTOMETRY.E_FLUX_DENSITY]])

                                mult = Decimal('1')
                                ufd = photodict[PHOTOMETRY.U_FLUX_DENSITY]
                                if ufd.lower() in [
                                        'mjy', 'millijy', 'millijansky']:
                                    mult = Decimal('1e3')
                                elif ufd.lower() in ['jy', 'jansky']:
                                    mult = Decimal('1e6')

                                if '<' in fd:
                                    set_pd_mag_from_flux_density(
                                        photodict, efd=str(
                                            Decimal(fd.strip('<')) * mult))
                                else:
                                    set_pd_mag_from_flux_density(
                                        photodict, fd=Decimal(fd) * mult,
                                        efd=Decimal(efd) * mult)
                            if not len(sources):
                                if use_self_source is None:
                                    sopts = [
                                        ('Bibcode', 'b'), ('Last name', 'l')]
                                    if self._require_source:
                                        sel_str = 'must_select_source'
                                    else:
                                        sel_str = 'select_source'
                                    text = prt.text(sel_str)
                                    skind = prt.prompt(
                                        text, kind='option',
                                        options=sopts, default='b',
                                        none_string=(
                                            None if self._require_source else
                                            'Neither, tag MOSFiT as source'))
                                    if skind == 'b':
                                        rsource = {}
                                        bibcode = ''

                                        while len(bibcode) != 19:
                                            bibcode = prt.prompt(
                                                'bibcode',
                                                kind='string',
                                                allow_blank=False
                                            )
                                            bibcode = bibcode.strip()
                                            if (re.search(
                                                '[0-9]{4}..........[\.0-9]{4}'
                                                '[A-Za-z]', bibcode)
                                                    is None):
                                                bibcode = ''
                                        rsource[
                                            SOURCE.BIBCODE] = bibcode
                                        use_self_source = False
                                    elif skind == 'l':
                                        rsource = {}
                                        last_name = prt.prompt(
                                            'last_name', kind='string'
                                        )
                                        rsource[
                                            SOURCE.NAME] = (
                                                last_name.strip().title() +
                                                ' et al., in preparation')
                                        use_self_source = False
                                    elif skind == 'n':
                                        use_self_source = True

                                photodict[
                                    PHOTOMETRY.SOURCE] = entries[
                                        rname].add_source(**rsource)

                            if any([x in photodict.get(
                                    PHOTOMETRY.MAGNITUDE, '')
                                    for x in ['<', '>']]):
                                photodict[PHOTOMETRY.UPPER_LIMIT] = True
                                photodict[
                                    PHOTOMETRY.MAGNITUDE] = photodict[
                                        PHOTOMETRY.MAGNITUDE].strip('<>')

                            if '<' in photodict.get(PHOTOMETRY.COUNT_RATE, ''):
                                photodict[PHOTOMETRY.UPPER_LIMIT] = True
                                photodict[
                                    PHOTOMETRY.COUNT_RATE] = photodict[
                                        PHOTOMETRY.COUNT_RATE].strip('<')
                                if PHOTOMETRY.E_COUNT_RATE in photodict:
                                    del(photodict[PHOTOMETRY.E_COUNT_RATE])

                            if '<' in photodict.get(
                                    PHOTOMETRY.FLUX_DENSITY, ''):
                                photodict[PHOTOMETRY.UPPER_LIMIT] = True
                                photodict[
                                    PHOTOMETRY.FLUX_DENSITY] = photodict[
                                        PHOTOMETRY.FLUX_DENSITY].strip('<')
                                if PHOTOMETRY.E_FLUX_DENSITY in photodict:
                                    del(photodict[PHOTOMETRY.E_FLUX_DENSITY])

                            # Apply offset time if set.
                            if (PHOTOMETRY.TIME in photodict and
                                    toffset != Decimal('0')):
                                photodict[PHOTOMETRY.TIME] = str(
                                    Decimal(photodict[PHOTOMETRY.TIME]) +
                                    toffset)

                            # Skip entries for which key values are not
                            # expected type.
                            if not all([
                                is_number(photodict.get(x, ''))
                                for x in photodict.keys() if
                                (PHOTOMETRY.get_key_by_name(x).type ==
                                 KEY_TYPES.NUMERIC)]):
                                continue

                            # Skip placeholder values.
                            if float(photodict.get(
                                    PHOTOMETRY.MAGNITUDE, 0.0)) > 50.0:
                                continue

                            # Add system if specified by user.
                            if (self._system is not None and
                                    PHOTOMETRY.SYSTEM not in photodict):
                                photodict[PHOTOMETRY.SYSTEM] = self._system

                            # Remove keys not in the `PHOTOMETRY` class.
                            for key in list(photodict.keys()):
                                if key not in PHOTOMETRY.vals():
                                    del(photodict[key])

                            # Add the photometry.
                            entries[rname].add_photometry(
                                **photodict)

                    merge_with_existing = None
                    for ei, entry in enumerate(entries):
                        entries[entry].sanitize()
                        if os.path.isfile(new_events[ei]):
                            if merge_with_existing is None:
                                merge_with_existing = prt.prompt(
                                    'merge_with_existing', default='y')
                            if merge_with_existing:
                                existing = Entry.init_from_file(
                                    catalog=None,
                                    name=event_names[ei],
                                    path=new_events[ei],
                                    merge=False,
                                    pop_schema=False,
                                    ignore_keys=[ENTRY.MODELS],
                                    compare_to_existing=False)
                                Catalog().copy_entry_to_entry(
                                    existing, entries[entry])

                        oentry = entries[entry]._ordered(entries[entry])
                        entabbed_json_dump(
                            {entry: oentry}, open(new_events[ei], 'w'),
                            separators=(',', ':'))

                    self._converted.extend([
                        [event_names[x], new_events[x]]
                        for x in range(len(event_names))])

                new_event_list.extend(new_events)
                previous_file = event
            else:
                new_event_list.append(event)

        return new_event_list