Beispiel #1
0
    def assign_columns(self, cidict, flines):
        """Assign columns based on header."""
        used_cis = OrderedDict()
        akeys = list(self._critical_keys) + list(self._helpful_keys)
        dkeys = list(self._dep_keys)
        prt = self._printer

        for fi, fl in enumerate(flines):
            if not any([is_number(x) for x in fl]):
                # Try to associate column names with common header keys.
                conflict_keys = []
                conflict_cis = []
                for ci, col in enumerate(fl):
                    for key in self._header_keys:
                        if any([(x[0] if isinstance(x, tuple)
                                 else x) == col.lower()
                                for x in self._header_keys[key]]):
                            if key in cidict or ci in used_cis:
                                # There is a conflict, ask user.
                                conflict_keys.append(key)
                                conflict_cis.append(ci)
                            else:
                                ind = [
                                    (x[0] if isinstance(x, tuple) else x)
                                    for x in self._header_keys[key]].index(
                                        col.lower())
                                match = self._header_keys[key][ind]
                                cidict[key] = [match[-1], ci] if isinstance(
                                    match, tuple) else ci
                                used_cis[ci] = key
                            break

                for cki, ck in enumerate(conflict_keys):
                    if ck in cidict:
                        ci = cidict[ck]
                        del(cidict[ck])
                        del(used_cis[ci])
            else:
                self._first_data = fi
                break

        # Look for columns that are band names if no mag/counts/flux dens
        # column was found.
        if (not any([x in cidict for x in [
            PHOTOMETRY.MAGNITUDE, PHOTOMETRY.COUNT_RATE,
                PHOTOMETRY.FLUX_DENSITY]])):
            # Delete `E_MAGNITUDE` and `BAND` if they exist (we'll need to find
            # for each column).
            key = PHOTOMETRY.MAGNITUDE
            ekey = PHOTOMETRY.E_MAGNITUDE
            bkey = PHOTOMETRY.BAND
            if ekey in cidict:
                ci = cidict[ekey]
                del(cidict[used_cis[ci]])
                del(used_cis[ci])
            if bkey in cidict:
                ci = cidict[bkey]
                del(cidict[used_cis[ci]])
                del(used_cis[ci])
            for fi, fl in enumerate(flines):
                if not any([is_number(x) for x in fl]):
                    # Try to associate column names with common header keys.
                    for ci, col in enumerate(fl):
                        if ci in used_cis:
                            continue
                        if col in self._band_names:
                            cidict.setdefault(key, []).append(ci)
                            used_cis[ci] = key
                            cidict.setdefault(bkey, []).append(col)
                        elif col in self._emagstrs:
                            cidict.setdefault(ekey, []).append(ci)
                            used_cis[ci] = ekey

        # See which keys we collected. If we are missing any critical keys, ask
        # the user which column they are.

        # First ask the user if this data is in magnitudes or in counts.
        self._data_type = 1
        if (PHOTOMETRY.MAGNITUDE in cidict and
                PHOTOMETRY.COUNT_RATE not in cidict and
                PHOTOMETRY.FLUX_DENSITY not in cidict):
            self._data_type = 1
        elif (PHOTOMETRY.MAGNITUDE not in cidict and
              PHOTOMETRY.COUNT_RATE in cidict and
              PHOTOMETRY.FLUX_DENSITY not in cidict):
            self._data_type = 2
        elif (PHOTOMETRY.MAGNITUDE not in cidict and
              PHOTOMETRY.COUNT_RATE not in cidict and
              PHOTOMETRY.FLUX_DENSITY in cidict):
            self._data_type = 3
        else:
            self._data_type = prt.prompt(
                'counts_mags_fds', kind='option',
                options=['Magnitudes', 'Counts (per second)',
                         'Flux Densities (Jansky)'],
                none_string=None)
        if self._data_type in [1, 3]:
            akeys.remove(PHOTOMETRY.COUNT_RATE)
            akeys.remove(PHOTOMETRY.E_COUNT_RATE)
            akeys.remove(PHOTOMETRY.ZERO_POINT)
            if (PHOTOMETRY.MAGNITUDE in akeys and
                    PHOTOMETRY.E_MAGNITUDE in akeys):
                akeys.remove(PHOTOMETRY.E_MAGNITUDE)
                akeys.insert(
                    akeys.index(PHOTOMETRY.MAGNITUDE) + 1,
                    PHOTOMETRY.E_MAGNITUDE)
            if (PHOTOMETRY.E_LOWER_MAGNITUDE in cidict and
                    PHOTOMETRY.E_UPPER_MAGNITUDE in cidict):
                akeys.remove(PHOTOMETRY.E_MAGNITUDE)
            dkeys.remove(PHOTOMETRY.E_COUNT_RATE)
        if self._data_type in [2, 3]:
            akeys.remove(PHOTOMETRY.MAGNITUDE)
            akeys.remove(PHOTOMETRY.E_MAGNITUDE)
            dkeys.remove(PHOTOMETRY.E_MAGNITUDE)
        if self._data_type in [1, 2]:
            akeys.remove(PHOTOMETRY.FLUX_DENSITY)
            akeys.remove(PHOTOMETRY.E_FLUX_DENSITY)
            if (PHOTOMETRY.E_LOWER_FLUX_DENSITY in cidict and
                    PHOTOMETRY.E_UPPER_FLUX_DENSITY in cidict):
                akeys.remove(PHOTOMETRY.E_FLUX_DENSITY)
            dkeys.remove(PHOTOMETRY.E_FLUX_DENSITY)
            dkeys.remove(PHOTOMETRY.U_FLUX_DENSITY)

        columns = np.array(flines[self._first_data:]).T.tolist()
        colstrs = np.array([
            ', '.join(x[:5]) + ', ...' for x in columns])
        colinds = np.setdiff1d(np.arange(
            len(colstrs)), list([x[-1] if (
                isinstance(x, list) and not isinstance(
                    x, string_types)) else x for x in cidict.values()]))
        ignore = prt.message('ignore_column', prt=False)
        specify = prt.message('specify_column', prt=False)
        for key in akeys:
            selected_cols = [
                y for y in [a for b in [
                    listify(x) for x in list(cidict.values())] for a in b]
                if isinstance(y, (int, np.integer))]
            if key in cidict:
                continue
            if key in dkeys and self._use_mc:
                continue
            if key.type == KEY_TYPES.NUMERIC:
                lcolinds = [x for x in colinds
                            if any(is_number(y) for y in columns[x]) and
                            x not in selected_cols]
            elif key.type == KEY_TYPES.TIME:
                lcolinds = [x for x in colinds
                            if any(is_date(y) or is_number(y)
                                   for y in columns[x]) and
                            x not in selected_cols]
            elif key.type == KEY_TYPES.STRING:
                lcolinds = [x for x in colinds
                            if any(not is_number(y) for y in columns[x]) and
                            x not in selected_cols]
            else:
                lcolinds = [x for x in colinds if x not in selected_cols]
            select = False
            selects = []
            while select is False:
                mc = 1
                if key in self._mc_keys:
                    pkey = self._inflect.plural(key)
                    text = prt.message(
                        'one_per_line', [key, pkey, pkey],
                        prt=False)
                    mc = prt.prompt(
                        text, kind='option', message=False,
                        none_string=None,
                        options=[
                            'One `{}` per row'.format(key),
                            'Multiple `{}` per row'.format(pkey)])
                if mc == 1:
                    text = prt.message(
                        'no_matching_column', [key], prt=False)
                    ns = (
                        ignore if key in (
                            self._optional_keys + self._helpful_keys) else
                        specify if key in self._specify_keys
                        else None)
                    if len(colstrs[lcolinds]):
                        select = prt.prompt(
                            text, message=False,
                            kind='option', none_string=ns,
                            default=('j' if ns is None and
                                     len(colstrs[lcolinds]) > 1
                                     else None if ns is None else 'n'),
                            options=colstrs[lcolinds].tolist() + (
                                [('Multiple columns need to be joined.', 'j')]
                                if len(colstrs[lcolinds]) > 1 else []))
                    else:
                        select = None
                    if select == 'j':
                        select = None
                        jsel = None
                        selects.append('j')
                        while jsel != 'd' and len(lcolinds):
                            jsel = prt.prompt(
                                'join_which_columns', default='d',
                                kind='option', none_string=None,
                                options=colstrs[lcolinds].tolist() + [
                                    ('All columns to be joined '
                                     'have been selected.', 'd')
                                ])
                            if jsel != 'd':
                                selects.append(lcolinds[jsel - 1])
                                lcolinds = np.delete(lcolinds, jsel - 1)
                else:
                    self._use_mc = True
                    select = False
                    while select is not None:
                        text = prt.message(
                            'select_mc_column', [key], prt=False)
                        select = prt.prompt(
                            text, message=False,
                            kind='option', default='n',
                            none_string='No more `{}` columns.'.format(key),
                            options=colstrs[lcolinds].tolist())
                        if select is not None and select is not False:
                            selects.append(lcolinds[select - 1])
                            lcolinds = np.delete(lcolinds, select - 1)
                        else:
                            break
                        for dk in dkeys:
                            dksel = None
                            while dksel is None:
                                text = prt.message(
                                    'select_dep_column', [dk, key], prt=False)
                                sk = dk in self._specify_keys
                                if not sk:
                                    dksel = prt.prompt(
                                        text, message=False,
                                        kind='option', none_string=None,
                                        options=colstrs[lcolinds].tolist())
                                    if dksel is not None:
                                        selects.append(lcolinds[dksel - 1])
                                        lcolinds = np.delete(
                                            lcolinds, dksel - 1)
                                else:
                                    spectext = prt.message(
                                        'specify_mc_value', [dk, key],
                                        prt=False)
                                    val = ''
                                    while val.strip() is '':
                                        val = prt.prompt(
                                            spectext, message=False,
                                            kind='string')
                                    selects.append(val)
                                    break

            if select is not None:
                iselect = int(select)
                cidict[key] = lcolinds[iselect - 1]
                colinds = np.delete(colinds, np.argwhere(
                    colinds == lcolinds[iselect - 1]))
            elif len(selects):
                if selects[0] == 'j':
                    cidict[key] = selects
                else:
                    kdkeys = [key] + dkeys
                    allk = list(OrderedDict.fromkeys(kdkeys).keys())
                    for ki, k in enumerate(allk):
                        cidict[k] = [
                            colinds[s - 1] if isinstance(s, (
                                int, np.integer)) else s
                            for s in selects[ki::len(allk)]]
                    for s in selects:
                        if not isinstance(s, (int, np.integer)):
                            continue
                        colinds = np.delete(colinds, np.argwhere(
                            colinds == s - 1))
            elif key in self._specify_keys:
                msg = ('specify_value_blank' if key in self._helpful_keys else
                       'specify_value')
                text = prt.message(msg, [key], prt=False)
                cidict[key] = prt.prompt(
                    text, message=False, kind='string', allow_blank=(
                        key in self._helpful_keys))

        self._zp = ''
        if self._data_type == 2 and PHOTOMETRY.ZERO_POINT not in cidict:
            while not is_number(self._zp):
                self._zp = prt.prompt('zeropoint', kind='string')

        self._ufd = None
        if self._data_type == 3 and PHOTOMETRY.U_FLUX_DENSITY not in cidict:
            while ((self._ufd.lower() if self._ufd is not None else None)
                   not in ['µjy', 'mjy', 'jy', 'microjy', 'millijy', 'jy',
                           'microjansky', 'millijansky', 'jansky', '']):
                self._ufd = prt.prompt('u_flux_density', kind='string')

        self._system = None
        if self._data_type == 1 and PHOTOMETRY.SYSTEM not in cidict:
            systems = ['AB', 'Vega']
            self._system = prt.prompt(
                'system', kind='option', options=systems,
                none_string='Use default for all bands.',
                default='n')
            if self._system is not None:
                self._system = systems[int(self._system) - 1]

        if (PHOTOMETRY.INSTRUMENT not in cidict and
                PHOTOMETRY.TELESCOPE not in cidict):
            prt.message('instrument_recommended', warning=True)
Beispiel #2
0
    def adjust_fixed_parameters(self, variance_for_each=[], output={}):
        """Create free parameters that depend on loaded data."""
        unique_band_indices = list(
            sorted(set(output.get('all_band_indices', []))))
        needs_general_variance = any(
            np.array(output.get('all_band_indices', [])) < 0)

        new_call_stack = OrderedDict()
        for task in self._call_stack:
            cur_task = self._call_stack[task]
            vfe = listify(variance_for_each)
            if task == 'variance' and 'band' in vfe:
                vfi = vfe.index('band') + 1
                mwfd = float(vfe[vfi]) if (vfi < len(vfe) and is_number(
                    vfe[vfi])) else self.MIN_WAVE_FRAC_DIFF
                # Find photometry in call stack.
                ptask = None
                for ptask in self._call_stack:
                    if ptask == 'photometry':
                        awaves = self._modules[ptask].average_wavelengths(
                            unique_band_indices)
                        abands = self._modules[ptask].bands(
                            unique_band_indices)
                        band_pairs = list(sorted(zip(awaves, abands)))
                        break
                owav = 0.0
                variance_bands = []
                for (awav, band) in band_pairs:
                    wave_frac_diff = abs(awav - owav) / (awav + owav)
                    if wave_frac_diff < mwfd:
                        continue
                    new_task_name = '-'.join([task, 'band', band])
                    if new_task_name in self._call_stack:
                        continue
                    new_task = deepcopy(cur_task)
                    new_call_stack[new_task_name] = new_task
                    if 'latex' in new_task:
                        new_task['latex'] += '_{\\rm ' + band + '}'
                    new_call_stack[new_task_name] = new_task
                    self._modules[new_task_name] = self._load_task_module(
                        new_task_name, call_stack=new_call_stack)
                    owav = awav
                    variance_bands.append([awav, band])
                if needs_general_variance:
                    new_call_stack[task] = deepcopy(cur_task)
                if self._pool.is_master():
                    self._printer.message(
                        'anchoring_variances',
                        [', '.join([x[1] for x in variance_bands])],
                        wrapped=True)
                self._modules[ptask].set_variance_bands(variance_bands)
            else:
                new_call_stack[task] = deepcopy(cur_task)
            # Fixed any variables to be fixed if any conditional inputs are
            # fixed by the data.
            # if any([listify(x)[-1] == 'conditional'
            #         for x in cur_task.get('inputs', [])]):
        self._call_stack = new_call_stack

        for task in reversed(self._call_stack):
            cur_task = self._call_stack[task]
            for inp in cur_task.get('inputs', []):
                other = listify(inp)[0]
                if (cur_task['kind'] == 'parameter'
                        and output.get(other, None) is not None):
                    if (not self._modules[other]._fixed
                            or self._modules[other]._fixed_by_user):
                        self._modules[task]._fixed = True
                    self._modules[task]._derived_keys = list(
                        set(self._modules[task]._derived_keys + [task]))
Beispiel #3
0
    def generate_event_list(self, event_list):
        """Generate a list of events and/or convert events to JSON format."""
        prt = self._printer
        cidict = OrderedDict()
        intro_shown = False

        new_event_list = []
        previous_file = None
        for event in event_list:
            rsource = {SOURCE.NAME: self._DEFAULT_SOURCE}
            use_self_source = None
            new_events = []
            toffset = Decimal('0')
            if ('.' in event and os.path.isfile(event) and
                    not event.endswith('.json')):
                if not intro_shown:
                    prt.message('converter_info')
                    intro_shown = True

                prt.message('converting_to_json', [event])

                with open(event, 'r') as f:
                    ftxt = f.read()

                # Try a couple of table formats from astropy.
                table = None
                try:
                    table = read(ftxt, Reader=Cds, guess=False)
                except Exception:
                    pass
                else:
                    prt.message('convert_cds')
                    flines = [table.colnames] + [
                        list(x) for x in np.array(table).tolist()]
                    for i in range(len(flines)):
                        flines[i] = [str(x) for x in flines[i]]

                try:
                    table = read(ftxt, Reader=Latex, guess=False)
                except Exception:
                    pass
                else:
                    prt.message('convert_latex')
                    flines = [table.colnames] + [
                        list(x) for x in np.array(table).tolist()]

                if table is None:
                    # Count to try and determine delimiter.
                    delims = [' ', '\t', ',', ';', '|', '&']
                    delimnames = [
                        'Space: ` `', 'Tab: `\t`', 'Comma: `,`',
                        'Semi-colon: `;`', 'Bar: `|`', 'Ampersand: `&`']
                    delim = None
                    delimcounts = [ftxt.count(x) for x in delims]
                    maxdelimcount = max(delimcounts)
                    delim = delims[delimcounts.index(maxdelimcount)]
                    # If two delimiter options are close in count, ask user.
                    for i, x in enumerate(delimcounts):
                        if x > 0.5 * maxdelimcount and delims[i] != delim:
                            delim = None
                    if delim is None:
                        odelims = list(np.array(delimnames)[
                            np.array(delimcounts) > 0])
                        delim = delims[prt.prompt(
                            'delim', kind='option', options=odelims) - 1]
                    ad = list(delims)
                    ad.remove(delim)
                    ad = ''.join(ad)

                    fsplit = ftxt.splitlines()
                    fsplit = [
                        x.replace('$', '').replace('\\pm', delim)
                        .replace('±', delim).replace('(', delim + '(')
                        .strip(ad + '()# ').replace('′', "'")
                        for x in fsplit]
                    flines = []
                    for fs in fsplit:
                        flines.append(list(
                            csv.reader([fs], delimiter=delim))[0])

                    flines = [[
                        x.strip(ad + '#$()\\')
                        for x in y] for y in flines]

                    # Find band columns if they exist and insert error columns
                    # if they don't exist.
                    for fi, fl in enumerate(list(flines)):
                        flcopy = list(fl)
                        offset = 0
                        if not any([is_number(x) for x in fl]):
                            for fci, fc in enumerate(fl):
                                if (fc in self._band_names and
                                    (fci == len(fl) - 1 or
                                     fl[fci + 1] not in self._emagstrs)):
                                    flcopy.insert(fci + 1 + offset, 'e mag')
                                    offset += 1
                        flines[fi] = flcopy

                    # Find the most frequent column count. These are probably
                    # the tables we wish to read.
                    flens = [len(x) for x in flines]
                    ncols = Counter(flens).most_common(1)[0][0]

                    newlines = []
                    potential_name = None
                    for fi, fl in enumerate(flines):
                        if (len(fl) and flens[fi] == 1 and
                            fi < len(flines) - 1 and
                                flens[fi + 1] == ncols and not len(newlines)):
                            potential_name = fl[0]
                        if flens[fi] == ncols:
                            if potential_name is not None and any(
                                    [is_number(x) for x in fl]):
                                newlines.append([potential_name] + list(fl))
                            else:
                                newlines.append(list(fl))
                    flines = newlines
                    for fi, fl in enumerate(flines):
                        if len(fl) == ncols and potential_name is not None:
                            if not any([is_number(x) for x in fl]):
                                flines[fi] = ['name'] + list(fl)

                # If none of the rows contain numeric data, the file
                # is likely a list of transient names.
                if (len(flines) and
                    (not any(any([is_number(x) or x == '' for x in y])
                             for y in flines) or
                     len(flines) == 1)):
                    new_events = [
                        it for s in flines for it in s]

                # If last row is numeric, then likely this is a file with
                # transient data.
                elif (len(flines) > 1 and
                        any([is_number(x) for x in flines[-1]])):

                    # Check that each row has the same number of columns.
                    if len(set([len(x) for x in flines])) > 1:
                        print(set([len(x) for x in flines]))
                        raise ValueError(
                            'Number of columns in each row not '
                            'consistent!')

                    if len(cidict) and len(new_event_list):
                        msg = ('is_file_same' if
                               previous_file else 'is_event_same')
                        reps = [previous_file] if previous_file else [''.join(
                            new_event_list[-1].split('.')[:-1])]
                        text = prt.text(msg, reps)
                        is_same = prt.prompt(text, message=False,
                                             kind='bool')
                        if not is_same:
                            cidict = OrderedDict()

                    # If the first row has no numbers it is likely a header.
                    if not len(cidict):
                        self.assign_columns(cidict, flines)

                    perms = 1
                    for key in cidict:
                        if isinstance(cidict[key], list) and not isinstance(
                                cidict[key], string_types):
                            if cidict[key][0] != 'j':
                                perms = len(cidict[key])

                    # Get event name (if single event) or list of names from
                    # table.
                    event_names = []
                    if ENTRY.NAME in cidict:
                        for fi, fl in enumerate(flines):
                            flines[fi][cidict[ENTRY.NAME]] = name_clean(
                                fl[cidict[ENTRY.NAME]])
                        event_names = list(sorted(set([
                            x[cidict[ENTRY.NAME]] for x in flines[
                                self._first_data:]])))
                        new_events = [x + '.json' for x in event_names]
                    else:
                        new_event_name = '.'.join(event.split(
                            '.')[:-1]).split('/')[-1]
                        text = prt.message(
                            'is_event_name', [new_event_name], prt=False)
                        is_name = prt.prompt(text, message=False,
                                             kind='bool', default='y')
                        if not is_name:
                            new_event_name = ''
                            while new_event_name.strip() == '':
                                new_event_name = prt.prompt(
                                    'enter_name', kind='string')
                        event_names.append(new_event_name)
                        new_events = [new_event_name + '.json']

                    # Create a new event, populate the photometry, and dump
                    # to a JSON file in the run directory.
                    entries = OrderedDict([(x, Entry(name=x))
                                           for x in event_names])

                    # Clean up the data a bit now that we know the column
                    # identities.

                    # Strip common prefixes/suffixes from band names
                    if PHOTOMETRY.BAND in cidict:
                        bi = cidict[PHOTOMETRY.BAND]
                        for d in [True, False]:
                            if not isinstance(bi, (int, np.integer)):
                                break
                            strip_cols = []
                            lens = [len(x[bi])
                                    for x in flines[self._first_data:]]
                            llen = min(lens)
                            ra = range(llen) if d else range(-1, -llen - 1, -1)
                            for li in ra:
                                letter = None
                                for row in list(flines[self._first_data:]):
                                    if letter is None:
                                        letter = row[bi][li]
                                    elif row[bi][li] != letter:
                                        letter = None
                                        break
                                if letter is not None:
                                    strip_cols.append(li)
                                else:
                                    break
                            if len(strip_cols) == llen:
                                break
                            for ri in range(len(flines[self._first_data:])):
                                flines[self._first_data + ri][bi] = ''.join(
                                    [c for i, c in enumerate(flines[
                                        self._first_data + ri][bi])
                                     if (i if d else i - len(flines[
                                         self._first_data + ri][bi])) not in
                                     strip_cols])

                    if (PHOTOMETRY.TIME in cidict and
                            (not isinstance(cidict[PHOTOMETRY.TIME], list) or
                             len(cidict[PHOTOMETRY.TIME]) <= 2)):
                        bi = cidict[PHOTOMETRY.TIME]

                        if isinstance(bi, list) and not isinstance(
                            bi, string_types) and isinstance(
                                bi[0], string_types) and bi[0] == 'jd':
                            bi = bi[-1]

                        mmtimes = [float(x[bi])
                                   for x in flines[self._first_data:]]
                        mintime, maxtime = min(mmtimes), max(mmtimes)

                        if mintime < 10000:
                            while True:
                                try:
                                    response = prt.prompt(
                                        'small_time_offset', kind='string')
                                    if response is not None:
                                        toffset = Decimal(response)
                                    break
                                except Exception:
                                    pass
                        elif maxtime > 60000 and cidict[
                                PHOTOMETRY.TIME][0] != 'jd':
                            isjd = prt.prompt(
                                'large_time_offset',
                                kind='bool', default='y')
                            if isjd:
                                toffset = Decimal('-2400000.5')

                    for row in flines[self._first_data:]:
                        photodict = {}
                        rname = (row[cidict[ENTRY.NAME]]
                                 if ENTRY.NAME in cidict else event_names[0])
                        for pi in range(perms):
                            sources = set()
                            for key in cidict:
                                if key in self._bool_keys:
                                    rval = row[cidict[key]]

                                    if rval in self._FALSE_VALS:
                                        rval = False
                                    elif rval in self._TRUE_VALS:
                                        rval = True

                                    if type(rval) != 'bool':
                                        try:
                                            rval = bool(rval)
                                        except Exception:
                                            pass

                                    if type(rval) != 'bool':
                                        try:
                                            rval = bool(float(rval))
                                        except Exception:
                                            rval = True

                                    if not rval:
                                        continue
                                    row[cidict[key]] = rval
                                elif key == 'reference':
                                    if (isinstance(cidict[key],
                                                   string_types) and
                                            len(cidict[key]) == 19):
                                        new_src = entries[rname].add_source(
                                            bibcode=cidict[key])
                                        sources.update(new_src)
                                        row[
                                            cidict[key]] = new_src
                                elif key == ENTRY.NAME:
                                    continue
                                elif (isinstance(key, Key) and
                                        key.type == KEY_TYPES.TIME and
                                        isinstance(cidict[key], list) and not
                                        isinstance(cidict[key],
                                                   string_types)):
                                    tval = np.array(row)[np.array(cidict[key][
                                        1:], dtype=int)]
                                    if cidict[key][0] == 'j':
                                        date = '-'.join([x.zfill(2) for x in
                                                         tval])
                                        date = self._month_rep.sub(
                                            lambda x: self._MONTH_IDS[
                                                x.group()], date)
                                        photodict[key] = str(
                                            astrotime(date, format='isot').mjd)
                                    elif cidict[key][0] == 'jd':
                                        photodict[key] = str(
                                            jd_to_mjd(Decimal(tval[-1])))
                                    continue

                                val = cidict[key]
                                if (isinstance(val, list) and not
                                        isinstance(val, string_types)):
                                    val = val[pi]
                                    if isinstance(val, string_types):
                                        if val != '':
                                            photodict[key] = val
                                    else:
                                        photodict[key] = row[val]
                                else:
                                    if isinstance(val, string_types):
                                        if val != '':
                                            photodict[key] = val
                                    else:
                                        photodict[key] = row[val]
                            if self._data_type == 2:
                                if self._zp:
                                    photodict[PHOTOMETRY.ZERO_POINT] = self._zp
                                else:
                                    photodict[PHOTOMETRY.ZERO_POINT] = (
                                        row[cidict[PHOTOMETRY.ZERO_POINT][pi]]
                                        if isinstance(cidict[
                                            PHOTOMETRY.ZERO_POINT], list) else
                                        row[cidict[PHOTOMETRY.ZERO_POINT]])
                                zpp = photodict[PHOTOMETRY.ZERO_POINT]
                                cc = (
                                    row[cidict[PHOTOMETRY.COUNT_RATE][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.COUNT_RATE], list) else
                                    row[cidict[PHOTOMETRY.COUNT_RATE]])
                                ecc = (
                                    row[cidict[PHOTOMETRY.E_COUNT_RATE][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.E_COUNT_RATE], list) else
                                    row[cidict[PHOTOMETRY.E_COUNT_RATE]])
                                if '<' in cc:
                                    set_pd_mag_from_counts(
                                        photodict, ec=cc.strip('<'), zp=zpp)
                                else:
                                    set_pd_mag_from_counts(
                                        photodict, c=cc, ec=ecc, zp=zpp)
                            elif self._data_type == 3:
                                photodict[
                                    PHOTOMETRY.U_FLUX_DENSITY] = self._ufd
                                if PHOTOMETRY.U_FLUX_DENSITY in cidict:
                                    photodict[PHOTOMETRY.U_FLUX_DENSITY] = (
                                        row[cidict[
                                            PHOTOMETRY.U_FLUX_DENSITY][pi]]
                                        if isinstance(cidict[
                                            PHOTOMETRY.
                                            U_FLUX_DENSITY], list) else
                                        row[cidict[PHOTOMETRY.U_FLUX_DENSITY]])
                                if photodict[
                                        PHOTOMETRY.U_FLUX_DENSITY] == '':
                                    photodict[
                                        PHOTOMETRY.U_FLUX_DENSITY] = 'µJy'
                                fd = (
                                    row[cidict[PHOTOMETRY.FLUX_DENSITY][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.FLUX_DENSITY], list) else
                                    row[cidict[PHOTOMETRY.FLUX_DENSITY]])
                                efd = (
                                    row[cidict[
                                        PHOTOMETRY.E_FLUX_DENSITY][pi]] if
                                    isinstance(cidict[
                                        PHOTOMETRY.E_FLUX_DENSITY], list) else
                                    row[cidict[PHOTOMETRY.E_FLUX_DENSITY]])

                                mult = Decimal('1')
                                ufd = photodict[PHOTOMETRY.U_FLUX_DENSITY]
                                if ufd.lower() in [
                                        'mjy', 'millijy', 'millijansky']:
                                    mult = Decimal('1e3')
                                elif ufd.lower() in ['jy', 'jansky']:
                                    mult = Decimal('1e6')

                                if '<' in fd:
                                    set_pd_mag_from_flux_density(
                                        photodict, efd=str(
                                            Decimal(fd.strip('<')) * mult))
                                else:
                                    set_pd_mag_from_flux_density(
                                        photodict, fd=Decimal(fd) * mult,
                                        efd=Decimal(efd) * mult)
                            if not len(sources):
                                if use_self_source is None:
                                    sopts = [
                                        ('Bibcode', 'b'), ('Last name', 'l')]
                                    if self._require_source:
                                        sel_str = 'must_select_source'
                                    else:
                                        sel_str = 'select_source'
                                    text = prt.text(sel_str)
                                    skind = prt.prompt(
                                        text, kind='option',
                                        options=sopts, default='b',
                                        none_string=(
                                            None if self._require_source else
                                            'Neither, tag MOSFiT as source'))
                                    if skind == 'b':
                                        rsource = {}
                                        bibcode = ''

                                        while len(bibcode) != 19:
                                            bibcode = prt.prompt(
                                                'bibcode',
                                                kind='string',
                                                allow_blank=False
                                            )
                                            bibcode = bibcode.strip()
                                            if (re.search(
                                                '[0-9]{4}..........[\.0-9]{4}'
                                                '[A-Za-z]', bibcode)
                                                    is None):
                                                bibcode = ''
                                        rsource[
                                            SOURCE.BIBCODE] = bibcode
                                        use_self_source = False
                                    elif skind == 'l':
                                        rsource = {}
                                        last_name = prt.prompt(
                                            'last_name', kind='string'
                                        )
                                        rsource[
                                            SOURCE.NAME] = (
                                                last_name.strip().title() +
                                                ' et al., in preparation')
                                        use_self_source = False
                                    elif skind == 'n':
                                        use_self_source = True

                                photodict[
                                    PHOTOMETRY.SOURCE] = entries[
                                        rname].add_source(**rsource)

                            if any([x in photodict.get(
                                    PHOTOMETRY.MAGNITUDE, '')
                                    for x in ['<', '>']]):
                                photodict[PHOTOMETRY.UPPER_LIMIT] = True
                                photodict[
                                    PHOTOMETRY.MAGNITUDE] = photodict[
                                        PHOTOMETRY.MAGNITUDE].strip('<>')

                            if '<' in photodict.get(PHOTOMETRY.COUNT_RATE, ''):
                                photodict[PHOTOMETRY.UPPER_LIMIT] = True
                                photodict[
                                    PHOTOMETRY.COUNT_RATE] = photodict[
                                        PHOTOMETRY.COUNT_RATE].strip('<')
                                if PHOTOMETRY.E_COUNT_RATE in photodict:
                                    del(photodict[PHOTOMETRY.E_COUNT_RATE])

                            if '<' in photodict.get(
                                    PHOTOMETRY.FLUX_DENSITY, ''):
                                photodict[PHOTOMETRY.UPPER_LIMIT] = True
                                photodict[
                                    PHOTOMETRY.FLUX_DENSITY] = photodict[
                                        PHOTOMETRY.FLUX_DENSITY].strip('<')
                                if PHOTOMETRY.E_FLUX_DENSITY in photodict:
                                    del(photodict[PHOTOMETRY.E_FLUX_DENSITY])

                            # Apply offset time if set.
                            if (PHOTOMETRY.TIME in photodict and
                                    toffset != Decimal('0')):
                                photodict[PHOTOMETRY.TIME] = str(
                                    Decimal(photodict[PHOTOMETRY.TIME]) +
                                    toffset)

                            # Skip entries for which key values are not
                            # expected type.
                            if not all([
                                is_number(photodict.get(x, ''))
                                for x in photodict.keys() if
                                (PHOTOMETRY.get_key_by_name(x).type ==
                                 KEY_TYPES.NUMERIC)]):
                                continue

                            # Skip placeholder values.
                            if float(photodict.get(
                                    PHOTOMETRY.MAGNITUDE, 0.0)) > 50.0:
                                continue

                            # Add system if specified by user.
                            if (self._system is not None and
                                    PHOTOMETRY.SYSTEM not in photodict):
                                photodict[PHOTOMETRY.SYSTEM] = self._system

                            # Remove keys not in the `PHOTOMETRY` class.
                            for key in list(photodict.keys()):
                                if key not in PHOTOMETRY.vals():
                                    del(photodict[key])

                            # Add the photometry.
                            entries[rname].add_photometry(
                                **photodict)

                    merge_with_existing = None
                    for ei, entry in enumerate(entries):
                        entries[entry].sanitize()
                        if os.path.isfile(new_events[ei]):
                            if merge_with_existing is None:
                                merge_with_existing = prt.prompt(
                                    'merge_with_existing', default='y')
                            if merge_with_existing:
                                existing = Entry.init_from_file(
                                    catalog=None,
                                    name=event_names[ei],
                                    path=new_events[ei],
                                    merge=False,
                                    pop_schema=False,
                                    ignore_keys=[ENTRY.MODELS],
                                    compare_to_existing=False)
                                Catalog().copy_entry_to_entry(
                                    existing, entries[entry])

                        oentry = entries[entry]._ordered(entries[entry])
                        entabbed_json_dump(
                            {entry: oentry}, open(new_events[ei], 'w'),
                            separators=(',', ':'))

                    self._converted.extend([
                        [event_names[x], new_events[x]]
                        for x in range(len(event_names))])

                new_event_list.extend(new_events)
                previous_file = event
            else:
                new_event_list.append(event)

        return new_event_list
Beispiel #4
0
    def load_data(self,
                  data,
                  event_name='',
                  smooth_times=-1,
                  extrapolate_time=0.0,
                  limit_fitting_mjds=False,
                  exclude_bands=[],
                  exclude_instruments=[],
                  exclude_systems=[],
                  exclude_sources=[],
                  exclude_kinds=[],
                  time_unit=None,
                  time_list=[],
                  band_list=[],
                  band_systems=[],
                  band_instruments=[],
                  band_bandsets=[],
                  band_sampling_points=25,
                  variance_for_each=[],
                  user_fixed_parameters=[],
                  user_released_parameters=[],
                  pool=None):
        """Load the data for the specified event."""
        if pool is not None:
            self._pool = pool
            self._printer._pool = pool

        prt = self._printer

        prt.message('loading_data', inline=True)

        # Fix user-specified parameters.
        fixed_parameters = []
        released_parameters = []
        for task in self._call_stack:
            for fi, param in enumerate(user_fixed_parameters):
                if (task == param
                        or self._call_stack[task].get('class', '') == param):
                    fixed_parameters.append(task)
                    if fi < len(user_fixed_parameters) - 1 and is_number(
                            user_fixed_parameters[fi + 1]):
                        value = float(user_fixed_parameters[fi + 1])
                        if value not in self._call_stack:
                            self._call_stack[task]['value'] = value
                    if 'min_value' in self._call_stack[task]:
                        del self._call_stack[task]['min_value']
                    if 'max_value' in self._call_stack[task]:
                        del self._call_stack[task]['max_value']
                    self._modules[task].fix_value(
                        self._call_stack[task]['value'])
            for fi, param in enumerate(user_released_parameters):
                if (task == param
                        or self._call_stack[task].get('class', '') == param):
                    released_parameters.append(task)

        self.determine_free_parameters(fixed_parameters, released_parameters)

        for ti, task in enumerate(self._call_stack):
            cur_task = self._call_stack[task]
            self._modules[task].set_event_name(event_name)
            new_per = np.round(100.0 * float(ti) / len(self._call_stack))
            prt.message('loading_task', [task, new_per], inline=True)
            self._kinds_supported |= set(cur_task.get('supports', []))
            if cur_task['kind'] == 'data':
                success = self._modules[task].set_data(
                    data,
                    req_key_values=OrderedDict(
                        (('band', self._bands), ('instrument',
                                                 self._instruments),
                         ('telescope', self._telescopes))),
                    subtract_minimum_keys=['times'],
                    smooth_times=smooth_times,
                    extrapolate_time=extrapolate_time,
                    limit_fitting_mjds=limit_fitting_mjds,
                    exclude_bands=exclude_bands,
                    exclude_instruments=exclude_instruments,
                    exclude_systems=exclude_systems,
                    exclude_sources=exclude_sources,
                    exclude_kinds=exclude_kinds,
                    time_unit=time_unit,
                    time_list=time_list,
                    band_list=band_list,
                    band_systems=band_systems,
                    band_instruments=band_instruments,
                    band_bandsets=band_bandsets)
                if not success:
                    return False
                fixed_parameters.extend(
                    self._modules[task].get_data_determined_parameters())
            elif cur_task['kind'] == 'sed':
                self._modules[task].set_data(band_sampling_points)
            self._kinds_needed |= self._modules[task]._kinds_needed

        # Find unsupported wavebands and report to user.
        unsupported_kinds = self._kinds_needed - self._kinds_supported
        if unsupported_kinds:
            prt.message('using_unsupported_kinds' if 'none' in exclude_kinds
                        else 'ignoring_unsupported_kinds',
                        [', '.join(sorted(unsupported_kinds))],
                        warning=True)

        # Determine free parameters again as setting data may have fixed some
        # more.
        self.determine_free_parameters(fixed_parameters, released_parameters)

        self.exchange_requests()

        prt.message('finding_bands', inline=True)

        # Run through once to set all inits.
        for root in ['output', 'objective']:
            outputs = self.run_stack(
                [0.0 for x in range(self._num_free_parameters)], root=root)

        # Create any data-dependent free parameters.
        self.adjust_fixed_parameters(variance_for_each, outputs)

        # Determine free parameters again as above may have changed them.
        self.determine_free_parameters(fixed_parameters, released_parameters)

        self.determine_number_of_measurements()

        self.exchange_requests()

        # Reset modules
        for task in self._call_stack:
            self._modules[task].reset_preprocessed(['photometry'])

        # Run through inits once more.
        for root in ['output', 'objective']:
            outputs = self.run_stack(
                [0.0 for x in range(self._num_free_parameters)], root=root)

        # Collect observed band info
        if self._pool.is_master() and 'photometry' in self._modules:
            prt.message('bands_used')
            bis = list(
                filter(lambda a: a != -1,
                       sorted(set(outputs['all_band_indices']))))
            ois = []
            for bi in bis:
                ois.append(
                    any([
                        y for x, y in zip(outputs['all_band_indices'],
                                          outputs['observed']) if x == bi
                    ]))
            band_len = max([
                len(self._modules['photometry']._unique_bands[bi]['origin'])
                for bi in bis
            ])
            filts = self._modules['photometry']
            ubs = filts._unique_bands
            filterarr = [
                (ubs[bis[i]]['systems'], ubs[bis[i]]['bandsets'],
                 filts._average_wavelengths[bis[i]],
                 filts._band_offsets[bis[i]], filts._band_kinds[bis[i]],
                 filts._band_names[bis[i]], ois[i], bis[i])
                for i in range(len(bis))
            ]
            filterrows = [
                (' ' + (' ' if s[-2] else '*') +
                 ubs[s[-1]]['origin'].ljust(band_len) + ' [' + ', '.join(
                     list(
                         filter(None, ('Bandset: ' + s[1] if s[1] else '',
                                       'System: ' + s[0] if s[0] else '',
                                       'AB offset: ' + pretty_num(s[3]) if
                                       (s[4] == 'magnitude' and s[0] != 'AB')
                                       else '')))) + ']').replace(' []', '')
                for s in list(sorted(filterarr))
            ]
            if not all(ois):
                filterrows.append(prt.text('not_observed'))
            prt.prt('\n'.join(filterrows))

            single_freq_inst = list(
                sorted(
                    set(
                        np.array(outputs['instruments'])[np.array(
                            outputs['all_band_indices']) == -1])))

            if len(single_freq_inst):
                prt.message('single_freq')
            for inst in single_freq_inst:
                prt.prt('  {}'.format(inst))

            if ('unmatched_bands' in outputs
                    and 'unmatched_instruments' in outputs):
                prt.message('unmatched_obs', warning=True)
                prt.prt(', '.join([
                    '{} [{}]'.format(x[0], x[1])
                    if x[0] and x[1] else x[0] if not x[1] else x[1]
                    for x in list(
                        set(
                            zip(outputs['unmatched_bands'],
                                outputs['unmatched_instruments'])))
                ]),
                        warning=True,
                        prefix=False,
                        wrapped=True)

        return True