def test_table_to_ts(self):
        # Generate the data and the corresponding dates
        base = parse_time(datetime.datetime.today())
        times = base - TimeDelta(np.arange(24 * 60)*u.minute)
        intensity = u.Quantity(np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24*60)))), u.W/u.m**2)

        # Create the units and meta objects
        units = OrderedDict([('intensity', u.W/u.m**2)])
        meta = MetaDict({'key':'value'})
        tbl_meta = MetaDict({'t_key':'t_value'})

        # Create a suitable mixin qtable
        table = Table([times, intensity], names=['time', 'intensity'], meta=tbl_meta)
        table.add_index('time')

        # Create TS from table and check
        ts_table = sunpy.timeseries.TimeSeries(table, meta, units)
        assert isinstance(ts_table, sunpy.timeseries.timeseriesbase.GenericTimeSeries)
        ts_table2 = sunpy.timeseries.TimeSeries(table, units, meta)
        assert (ts_table2 == ts_table)

        # Create TS using a tuple of values
        ts_table3 = sunpy.timeseries.TimeSeries((table, meta, units))
        assert isinstance(ts_table3, sunpy.timeseries.timeseriesbase.GenericTimeSeries)

        # ToDo: Try an incompatible table
        dual_index_table = Table([times, intensity], names=['time', 'intensity'], meta=tbl_meta)
        dual_index_table.add_index(('time', 'intensity'))
        with pytest.raises(ValueError):
            sunpy.timeseries.TimeSeries((dual_index_table, meta, units))
    def test_table_to_ts(self):
        # Generate the data and the corresponding dates
        base = parse_time(datetime.datetime.today())
        times = base - TimeDelta(np.arange(24 * 60)*u.minute)
        intensity = u.Quantity(np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24*60)))), u.W/u.m**2)

        # Create the units and meta objects
        units = OrderedDict([('intensity', u.W/u.m**2)])
        meta = MetaDict({'key':'value'})
        tbl_meta = MetaDict({'t_key':'t_value'})

        # Create a suitable mixin qtable
        table = Table([times, intensity], names=['time', 'intensity'], meta=tbl_meta)
        table.add_index('time')

        # Create TS from table and check
        ts_table = sunpy.timeseries.TimeSeries(table, meta, units)
        assert isinstance(ts_table, sunpy.timeseries.timeseriesbase.GenericTimeSeries)
        ts_table2 = sunpy.timeseries.TimeSeries(table, units, meta)
        assert (ts_table2 == ts_table)

        # Create TS using a tuple of values
        ts_table3 = sunpy.timeseries.TimeSeries((table, meta, units))
        assert isinstance(ts_table3, sunpy.timeseries.timeseriesbase.GenericTimeSeries)

        # ToDo: Try an incompatible table
        dual_index_table = Table([times, intensity], names=['time', 'intensity'], meta=tbl_meta)
        dual_index_table.add_index(('time', 'intensity'))
        with pytest.raises(ValueError):
            sunpy.timeseries.TimeSeries((dual_index_table, meta, units))
Beispiel #3
0
def _get_fid_acq_stages():
    fid_acqs = Table.read("""

   warns score P2=0.0 P2=2.0 P2=2.5 P2=3.0 P2=4.0 P2=5.0 P2=6.0 P2=8.0 P2=99.0
   ----- ----- ------ ------ ------ ------ ------ ------ ------ ------ -------
       -     0      0    1.9    2.4    2.8    3.6    4.5    5.0    6.0     6.0
       Y     1      0    1.9    2.4    2.8    3.6    4.5    5.0    6.0     6.0
      YY     2      0    1.9    2.4   2.75    3.4    4.2    4.2    4.5     4.5
     YYY     3      0    1.9    2.4    2.7    3.2    3.5    3.5    3.5     3.5
       R     4      0    1.7    2.2    2.5    3.1    3.4    3.4    3.4     3.4
      RY     5      0    1.7    2.2    2.4    3.0    3.3    3.3    3.3     3.3
     RYY     6      0    1.7    2.2    2.3    2.5    2.5    2.5    2.5     2.5
      RR     8      0    1.5    2.0    2.0    2.0    2.0    2.0    2.0     2.0
     RRY     9      0    1.5    2.0    2.0    2.0    2.0    2.0    2.0     2.0
     RRR    12     -1   -1.0   -1.0   -1.0   -1.0   -1.0   -1.0   -1.0    -1.0

    """,
                          format='ascii.fixed_width_two_line')

    P2s = [
        float(name[3:]) for name in fid_acqs.colnames if name.startswith('P2=')
    ]
    funcs = []
    for fid_acq in fid_acqs:
        vals = [
            fid_acq[name] for name in fid_acqs.colnames
            if name.startswith('P2=')
        ]
        funcs.append(interp1d(P2s, vals))

    out = Table([fid_acqs['score'], funcs], names=['spoiler_score', 'min_P2'])
    out.add_index('spoiler_score')
    return out
Beispiel #4
0
def get_site_table_single_query(from_date=None, ndays=5):
    if ndays > 100:
        ndays = 100
    sched = get_telsched(from_date=from_date, ndays=ndays, telnr=None)
    t = Table(names=[
        'Date', 'progID', 'Observers', 'PiFirstName', 'PiLastName', 'PiId'
    ] + site_list,
              dtype=['a10', 'a10', 'a100', 'a50', 'a50', 'i4'] +
              [int] * len(site_list))

    for prog in sched:
        if prog['Date'] not in list(t['Date']):
            row = {
                'Date': prog['Date'],
                'Observers': prog['Observers'],
                'progID': prog['ProjCode']
            }
            for site in site_list:
                row[site] = 0
            t.add_row(row)
        if prog['Location'] == 'CIT. Hirsch,CIT,UCB,CIT':
            tonights_sites = 'CIT,CIT,UCB,CIT'.split(',')
        else:
            tonights_sites = prog['Location'].split(',')
        tonights_observers = prog['Observers'].split(',')
        while len(tonights_sites) != len(tonights_observers):
            if len(tonights_sites) > len(tonights_observers):
                print(
                    f'{prog["Date"]}: N sites > N observers: removing last site'
                )
                tonights_sites.pop()
            elif len(tonights_sites) < len(tonights_observers):
                print(
                    f'{prog["Date"]}: N sites < N observers: adding site Other'
                )
                tonights_sites.append('Other')
        t.add_index('Date')
        rowid = t.loc_indices[prog['Date']]
        for entry in tonights_sites:
            if entry == 'CIT. Hirsch':
                print(tonights_sites)
                print('Correcting comma')
                entry = 'CIT, Hirsch'
            if entry in site_list:
                t[rowid][entry] += 1
            elif entry == 'Swin':
                t[rowid]['Swinburne'] += 1
            elif entry == 'Northwestern':
                t[rowid]['NU'] += 1
            elif entry == 'USCS':
                t[rowid]['UCSC'] += 1
            elif entry == 'NASA':
                t[rowid]['Other'] += 1
            elif entry == '':
                pass
            else:
                print(f'Unmatched entry: "{entry}"')

    return t
Beispiel #5
0
def test_table_index_time_warning(engine):
    # Make sure that no ERFA warnings are emitted when indexing a table by
    # a Time column with a non-default time scale
    tab = Table()
    tab['a'] = Time([1, 2, 3], format='jyear', scale='tai')
    tab['b'] = [4, 3, 2]
    with warnings.catch_warnings(record=True) as wlist:
        tab.add_index(('a', 'b'), engine=engine)
    assert len(wlist) == 0
Beispiel #6
0
def get_datasets(observatory):
    """
    Get a list of datasets for a given observatory.

    Parameters
    ----------
    observatory : `str`
        Observatory name.

    Returns
    -------
    `astropy.table.Table`

    Examples
    --------
    >>> from sunpy.net.cdaweb import get_datasets
    >>>
    >>> datasets = get_datasets('STEREOB') #doctest: +REMOTE_DATA
    >>> datasets['Id'] #doctest: +REMOTE_DATA
    <Column name='Id' dtype='str17' length=4>
        STB_LB_IMPACT
    STB_L1_IMPACT_HKP
           STB_L1_HET
     STB_L1_SWEA_SPEC
    >>> datasets.loc['STB_L1_SWEA_SPEC']['Label'] #doctest: +REMOTE_DATA
    'STEREO Behind IMPACT/SWEA Spectra - J. Luhmann (UCB/SSL)'
    >>> datasets.loc['STB_L1_SWEA_SPEC'][['Start', 'End']] #doctest: +REMOTE_DATA
    <Row index=3>
             Start                     End
             str24                    str24
    ------------------------ ------------------------
    2012-12-01T00:00:03.000Z 2013-12-31T23:59:41.000Z
    """
    # Get a list of files for a given dataset between start and end times
    url = '/'.join([
        _CDAS_BASEURL,
        'dataviews', _DATAVIEW,
        'datasets'
    ])
    url = f'{url}?observatory={observatory}'
    response = requests.get(url, headers=_CDAS_HEADERS)
    datasets = response.json()['DatasetDescription']

    ids = [dataset['Id'] for dataset in datasets]
    instruments = [', '.join(dataset['Instrument']) for dataset in datasets]
    labels = [dataset['Label'] for dataset in datasets]
    stimes = [dataset['TimeInterval']['Start'] for dataset in datasets]
    etimes = [dataset['TimeInterval']['End'] for dataset in datasets]

    t = Table([ids, instruments, labels, stimes, etimes],
              names=['Id', 'Instruments', 'Label', 'Start', 'End'])
    t.add_index('Id')
    return t
Beispiel #7
0
    def test_invalid_updates(self, main_col, table_types, engine):
        # using .loc and .loc_indices with a value not present should raise an exception
        self._setup(main_col, table_types)
        t = Table([[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6]],
                  names=('a', 'b', 'c'), meta={'name': 'first table'})

        t.add_index('a')
        with pytest.raises(ValueError):
            t.loc[3] = [[1, 2, 3]]
        with pytest.raises(ValueError):
            t.loc[[1, 4, 2]] = [[1, 2, 3], [4, 5, 6]]
        with pytest.raises(ValueError):
            t.loc[[1, 4, 2]] = [[1, 2, 3], [4, 5, 6], [2, 3]]
        with pytest.raises(ValueError):
            t.loc[[1, 4, 2]] = [[1, 2, 3], [4, 5], [2, 3]]
Beispiel #8
0
def get_observatory_groups():
    """
    Get a list of observatory IDs for each observatory in CDAWeb.

    An observatory group is typically a single mission, which can contain
    multiple observatories, e.g. for the STEREO observatory group there are two
    observatories, STEREO-A and STEREO-B.

    Returns
    -------
    `astropy.table.Table`

    Examples
    --------
    >>> from sunpy.net.cdaweb import get_observatory_groups
    >>>
    >>> groups = get_observatory_groups() #doctest: +REMOTE_DATA
    >>> groups['Group'] #doctest: +REMOTE_DATA
        <Column name='Group' dtype='str55' length=75>
                        ACE
                      AMPTE
        ...
                    Voyager
                       Wind
    >>> groups.loc['STEREO'] #doctest: +REMOTE_DATA
    <Row index=62>
    Group                                  Observatories
    str55                                      str518
    ------ -----------------------------------------------------------------------------
    STEREO 'Ahead', 'Behind', 'STA', 'STB', 'STEREO', 'STEREOA', 'STEREOB', 'sta', 'stb'
    """
    # Get a list of files for a given dataset between start and end times
    url = '/'.join(
        [_CDAS_BASEURL, 'dataviews', _DATAVIEW, 'observatoryGroups'])
    response = requests.get(url, headers=_CDAS_HEADERS)
    obs_groups = response.json()

    names = [obs['Name'] for obs in obs_groups['ObservatoryGroupDescription']]
    obs_ids = [
        obs['ObservatoryId']
        for obs in obs_groups['ObservatoryGroupDescription']
    ]
    # Join all IDs into a single string
    obs_ids = ["'" + "', '".join(id) + "'" for id in obs_ids]

    t = Table([names, obs_ids], names=['Group', 'Observatories'])
    t.add_index('Group')
    return t
Beispiel #9
0
    def test_updating_row_byindex(self, main_col, table_types, engine):
        self._setup(main_col, table_types)
        t = Table([['a', 'b', 'c', 'd'], [2, 3, 4, 5], [3, 4, 5, 6]],
                  names=('a', 'b', 'c'), meta={'name': 'first table'})

        t.add_index('a', engine=engine)
        t.add_index('b', engine=engine)

        t.loc['c'] = ['g', 40, 50]  # single label, with primary key 'a'
        t2 = t[2]
        assert list(t2) == ['g', 40, 50]

        # list search
        t.loc[['a', 'd', 'b']] = [['a', 20, 30], ['d', 50, 60], ['b', 30, 40]]
        t2 = [['a', 20, 30], ['d', 50, 60], ['b', 30, 40]]
        for i, p in zip(t2, [1, 4, 2]):  # same order as input list
            assert list(t[p - 1]) == i
Beispiel #10
0
def sortBf(data, model):
    if model=="b2s1_ode":
        col_names = ['branch','stability','p','L2','b1','b2','s']
#        stability = data[1]>=0
        data_cols = [data[0],data[1],data[4],data[5],data[6],data[7],data[8]]
        t = Table(data_cols, names=col_names
                  ,meta={'name': model+' ODE AUTO bifurcation diagram'}
                  ,dtype=('i4', 'i4','f8','f8','f8','f8','f8'))
        t.add_index('branch')
    elif model=="b2s2":
        col_names = ['branch','stability','p','L2','b1','b2','s1','s2']
#        stability = data[1]>=0
        data_cols = [data[0],data[1],data[4],data[5],data[6],data[7],data[8],data[9]]
        t = Table(data_cols, names=col_names
                  ,meta={'name': model+' ODE AUTO bifurcation diagram'}
                  ,dtype=('i4', 'i4','f8','f8','f8','f8','f8','f8'))
        t.add_index('branch')
    return t
Beispiel #11
0
def table_ts():
    # Generate the data and the corresponding dates
    base = parse_time(datetime.datetime.today())
    times = base - TimeDelta(np.arange(24 * 60)*u.minute)
    intensity = u.Quantity(
        np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24 * 60)))), u.W / u.m ** 2)

    # Create the units and meta objects
    units = OrderedDict([('intensity', u.W / u.m**2)])
    meta = MetaDict({'key': 'value'})
    tbl_meta = MetaDict({'t_key': 't_value'})

    # Create a suitable mixin qtable
    table = Table(
        [times, intensity], names=['time', 'intensity'], meta=tbl_meta)
    table.add_index('time')

    # Create TS from dataframe and check
    return sunpy.timeseries.TimeSeries(table, meta, units)
Beispiel #12
0
def table_ts():
    # Generate the data and the corresponding dates
    base = parse_time(datetime.datetime.today())
    times = base - TimeDelta(np.arange(24 * 60)*u.minute)
    intensity = u.Quantity(
        np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24 * 60)))), u.W / u.m ** 2)

    # Create the units and meta objects
    units = OrderedDict([('intensity', u.W / u.m**2)])
    meta = MetaDict({'key': 'value'})
    tbl_meta = MetaDict({'t_key': 't_value'})

    # Create a suitable mixin qtable
    table = Table(
        [times, intensity], names=['time', 'intensity'], meta=tbl_meta)
    table.add_index('time')

    # Create TS from dataframe and check
    return sunpy.timeseries.TimeSeries(table, meta, units)
Beispiel #13
0
    def to_table(self, format=".6e"):
        """Convert covariance matrix to table

        Parameters
        ----------
        format : str
            Column format string

        Returns
        -------
        table : `~astropy.table.Table`
            Covariance table
        """
        table = Table()
        table["name"] = self.parameters.names

        for idx, par in enumerate(self.parameters):
            vals = self.data[idx]
            table[par.name] = vals
            table[par.name].format = format

        table.add_index("name")
        return table
Beispiel #14
0
def test_get_index():
    a = [1, 4, 5, 2, 7, 4, 45]
    b = [2.0, 5.0, 8.2, 3.7, 4.3, 6.5, 3.3]
    t = Table([a, b], names=('a', 'b'), meta={'name': 'first table'})
    t.add_index(['a'])
    # Getting the values of index using names
    x1 = get_index(t, names=['a'])

    assert isinstance(x1, SlicedIndex)
    assert len(x1.columns) == 1
    assert len(x1.columns[0]) == 7
    assert x1.columns[0].info.name == 'a'
    # Getting the vales of index using table_copy
    x2 = get_index(t, table_copy=t[['a']])

    assert isinstance(x2, SlicedIndex)
    assert len(x2.columns) == 1
    assert len(x2.columns[0]) == 7
    assert x2.columns[0].info.name == 'a'

    with pytest.raises(ValueError):
        get_index(t, names=['a'], table_copy=t[['a']])
    with pytest.raises(ValueError):
        get_index(t, names=None, table_copy=None)
Beispiel #15
0
    lcat = 'WENSS'

    if 'WENSS' in labels:
        nu = [pardict[hcat][0], pardict[lcat][0]]
        s = [pardict[hcat][1], pardict[lcat][1]]
        e_s = [pardict[hcat][2], pardict[lcat][2]]
        if ~np.isnan(s).any():
            sp, e_sp = dquants.two_pt_alpha(nu, s, e_s)
            al_calcs[ii]['al_l_WENSS_NVSS', 'e_al_l_WENSS_NVSS'] = sp, e_sp
            lcol = lcat + '_Limit'
            if lcol in sp_row.colnames:
                if sp_row[lcol] == 'U':
                    al_calcs[ii]['l_al_l_WENSS_NVSS'] = True

al_calcs = join(al_calcs, sp_class, keys='Source_name', join_type='left')
al_calcs.add_index('Source_name')

sed_fit_params = Table(
    names=('Source_name', 'SED_Class', 'al_10_1.4', 'e_al_10_1.4',
           'l_al_10_1.4', 'al_1.4_.15', 'e_al_1.4_.15', 'l_al_1.4_.15', 'nu_p',
           'e_nu_p', 'l_nu_p', 's_p', 'e_s_p', 'q', 'e_q', 'al_highc',
           'e_al_highc', 'l_al_highc', 'al_lowc', 'e_al_lowc', 'l_al_lowc',
           'quality', 'ccode', 's0_highc', 'e_s0_highc', 's0_lowc',
           'e_s0_lowc', 'e_nu_p_eh', 'e_nu_p_el', 'e_s_p_eh', 'e_s_p_el'),
    dtype=('S8', 'S3', 'f8', 'f8', 'bool', 'f8', 'f8', 'bool', 'f8', 'f8',
           'bool', 'f8', 'f8', 'f8', 'f8', 'f8', 'f8', 'bool', 'f8', 'f8',
           'bool', 'S2', 'S1', 'f8', 'f8', 'f8', 'f8', 'f8', 'f8', 'f8', 'f8'))

fit_results_ext_tab.add_index('source')

for row in final_unq_jvla:
Beispiel #16
0
def main(name,version,HSTband,scalepc):
    '''match nebulae and association catalogue

    '''

    print(f'parameters: {name} {version} {HSTband} {scalepc}')


    # =====================================================================
    # Read in the data
    # =====================================================================

    #p = {x:sample_table.loc[name][x] for x in sample_table.columns}

    # DAP linemaps (Halpha and OIII)
    filename = data_ext / 'MUSE' / 'DR2.1' / 'copt' / 'MUSEDAP'
    filename = [x for x in filename.iterdir() if x.stem.startswith(name)][0]

    with fits.open(filename) as hdul:
        Halpha = NDData(data=hdul['HA6562_FLUX'].data,
                        uncertainty=StdDevUncertainty(hdul['HA6562_FLUX_ERR'].data),
                        mask=np.isnan(hdul['HA6562_FLUX'].data),
                        meta=hdul['HA6562_FLUX'].header,
                        wcs=WCS(hdul['HA6562_FLUX'].header))
        OIII = NDData(data=hdul['OIII5006_FLUX'].data,
                        uncertainty=StdDevUncertainty(hdul['OIII5006_FLUX_ERR'].data),
                        mask=np.isnan(hdul['OIII5006_FLUX'].data),
                        meta=hdul['OIII5006_FLUX'].header,
                        wcs=WCS(hdul['OIII5006_FLUX'].header))

    # the original catalogue from Francesco
    with fits.open(nebulae_file) as hdul:
        nebulae = Table(hdul[1].data)
    nebulae['SkyCoord'] = SkyCoord(nebulae['cen_ra']*u.deg,nebulae['cen_dec']*u.deg,frame='icrs')

    '''
    with fits.open(basedir/'data'/'interim'/f'Nebulae_Catalogue_v2p1_dig.fits') as hdul:
        dig = Table(hdul[1].data)

    with fits.open(basedir/'data'/'interim'/f'Nebulae_Catalogue_v2p1_fuv.fits') as hdul:
        fuv = Table(hdul[1].data)

    with fits.open(basedir/'data'/'interim'/f'Nebulae_Catalogue_v2p1_eq.fits') as hdul:
        eq_width = Table(hdul[1].data)

    nebulae = join(nebulae,fuv,keys=['gal_name','region_ID'])
    nebulae = join(nebulae,eq_width,keys=['gal_name','region_ID'])
    nebulae = join(nebulae,dig,keys=['gal_name','region_ID'])'
    '''

    nebulae.rename_columns(['cen_x','cen_y'],['x','y'])

    with np.errstate(divide='ignore',invalid='ignore'):
        nebulae['[SIII]/[SII]'] = np.nan
        SII = nebulae['SII6716_FLUX_CORR']+nebulae['SII6730_FLUX_CORR']
        SIII = nebulae['SIII6312_FLUX_CORR']+nebulae['SIII9068_FLUX_CORR']
        nebulae[SII>0]['[SIII]/[SII]'] = SIII[SII>0]/SII[SII>0]
        #nebulae['HA/FUV'] = nebulae['HA6562_FLUX_CORR']/nebulae['FUV_FLUX_CORR']
        #nebulae['HA/FUV_err'] = nebulae['HA/FUV']*np.sqrt((nebulae['HA6562_FLUX_CORR_ERR']/nebulae['HA6562_FLUX_CORR'])**2+(nebulae['FUV_FLUX_CORR_ERR']/nebulae['FUV_FLUX_CORR'])**2)

    nebulae = nebulae[nebulae['gal_name']==name]
    nebulae.add_index('region_ID')

    filename = data_ext / 'Products' / 'Nebulae_catalogs'/'Nebulae_catalogue_v2' /'spatial_masks'/f'{name}_nebulae_mask_V2.fits'
    with fits.open(filename) as hdul:
        nebulae_mask = NDData(hdul[0].data.astype(float),mask=Halpha.mask,meta=hdul[0].header,wcs=WCS(hdul[0].header))
        nebulae_mask.data[nebulae_mask.data==-1] = np.nan

    #print(f'{name}: {len(nebulae)} HII-regions in final catalogue')

    # the association catalogue and mask
    target  = name.lower()
    associations, associations_mask = read_associations(folder=association_folder,
                                                        target=target,scalepc=scalepc,
                                                        HSTband=HSTband,version=version,data='all')
    if not associations:
        return 0
    
    # enviornmental masks
    with fits.open(env_masks_folder / f'{name}_simple.fits') as hdul:
        mask = reproject_interp(hdul[0],Halpha.meta,order='nearest-neighbor',return_footprint=False)
        env_masks_neb = NDData(data=mask,
                           meta=hdul[0].header,
                           wcs=Halpha.wcs)
    
    #print(f'{name}: {len(associations)} associations in catalogue')

    # =====================================================================
    # reproject and match catalogues
    # =====================================================================

    nebulae_hst, _  = reproject_interp(nebulae_mask,
                                    output_projection=associations_mask.wcs,
                                    shape_out=associations_mask.data.shape,
                                    order='nearest-neighbor')    

    # we scale the associations such that the the id is in the decimal
    scale = 10**np.ceil(np.log10(max(associations_mask.data[~np.isnan(associations_mask.data)])))
    s_arr = associations_mask.data/scale+nebulae_hst

    #print(f'masks reprojected')

    # ids of associations, nebulae and combination (sum) of both
    a_id = np.unique(associations_mask.data[~np.isnan(associations_mask.data)]).astype(int)
    n_id = np.unique(nebulae_mask.data[~np.isnan(nebulae_mask.data)]).astype(int)
    s_id = np.unique(s_arr[~np.isnan(s_arr)])

    # this splits the sum into two parts (nebulae and associations)
    a_modf,n_modf = np.modf(s_id)
    n_modf = n_modf.astype(int)
    a_modf = np.round(a_modf*scale).astype(int)

    unique_a, count_a = np.unique(a_modf,return_counts=True)
    unique_n, count_n = np.unique(n_modf,return_counts=True)

    nebulae_dict = {int(n) : a_modf[n_modf==n].tolist() for n in n_id}     
    associations_dict = {int(a) : n_modf[a_modf==a].tolist() for a in a_id}     


    # so far we ensured that the nebulae in unique_n have only one association,
    # but it is possible that this association goes beyond the nebulae and into
    # a second nebulae. Those objects are excluded here
    isolated_nebulae = set()
    isolated_assoc   = set()
    for n,v in nebulae_dict.items():
        if len(v)==1:
            if len(associations_dict[v[0]])==1:
                isolated_nebulae.add(n)
                isolated_assoc.add(v[0])

    #print(f'n_associations = {len(associations_dict)}')
    #print(f'n_nebulae      = {len(nebulae_dict)}')
    #print(f'1to1 match     = {len(isolated_nebulae)}')


    # we save those two dicts so we do not have to redo this everytime
    with open(basedir/version/HSTband/f'{scalepc}pc'/f'{name}_{HSTband}_{scalepc}pc_nebulae.yml','w+') as f:
        yaml.dump(nebulae_dict,f)
    with open(basedir/version/HSTband/f'{scalepc}pc'/f'{name}_{HSTband}_{scalepc}pc_associations.yml','w+') as f:
        yaml.dump(associations_dict,f)


    # find all assoc that have at least one pixel outside of the nebulae masks
    mask = associations_mask.data.copy()
    mask[~np.isnan(nebulae_hst)] = np.nan
    outside = np.unique(mask[~np.isnan(mask)].astype(int))

    # find all assoc that have at least one pixel inside of the nebulea masks
    mask = associations_mask.data.copy()
    mask[np.isnan(nebulae_hst)] = np.nan
    inside = np.unique(mask[~np.isnan(mask)].astype(int))

    contained = np.setdiff1d(inside,outside)
    partial   = np.intersect1d(inside,outside)
    isolated  = np.setdiff1d(outside,inside)

    #print(f'contained: {len(contained)}\npartial: {len(partial)}\nisolated: {len(isolated)}')

    assoc_tmp = associations[['assoc_ID']].copy()
    assoc_tmp.add_index('assoc_ID')

    x_asc,y_asc = associations['SkyCoord'].to_pixel(env_masks_neb.wcs)
    outside = (x_asc > env_masks_neb.data.shape[1]) | (y_asc > env_masks_neb.data.shape[0])
    x_asc[outside] = 0
    y_asc[outside] = 0
    assoc_tmp['env_asc'] = [environment_dict[env_masks_neb.data[y,x]] for 
                            x,y in zip(x_asc.astype(int),y_asc.astype(int))]
    assoc_tmp[outside]['env_asc'] = ''
    
    assoc_tmp['overlap'] = np.empty(len(associations),dtype='U9')
    assoc_tmp['overlap'][np.isin(assoc_tmp['assoc_ID'],contained)] = 'contained'
    assoc_tmp['overlap'][np.isin(assoc_tmp['assoc_ID'],partial)]   = 'partial'
    assoc_tmp['overlap'][np.isin(assoc_tmp['assoc_ID'],isolated)]  = 'isolated'
    assoc_tmp['1to1'] = False
    assoc_tmp['1to1'][np.isin(assoc_tmp['assoc_ID'],list(isolated_assoc))] = True
    assoc_tmp['Nnebulae'] = [len(associations_dict[k]) for k in assoc_tmp['assoc_ID']]

    assoc_tmp['region_ID'] = np.nan
    assoc_tmp['region_ID'][assoc_tmp['1to1']] = [associations_dict[k][0] for k in assoc_tmp[assoc_tmp['1to1']]['assoc_ID']]

    overlap = join(
        Table(np.unique(associations_mask.data[~np.isnan(associations_mask.data)],return_counts=True),names=['assoc_ID','size']),
        Table(np.unique(associations_mask.data[~np.isnan(nebulae_hst) & ~np.isnan(associations_mask.data)],return_counts=True),names=['assoc_ID','overlap_size']),
        keys=['assoc_ID'],join_type='outer')
    overlap = overlap.filled(0)
    overlap['overlap_asc'] = overlap['overlap_size']/overlap['size']
    overlap['overlap_asc'].info.format = '%.2f'
    assoc_tmp = join(assoc_tmp,overlap[['assoc_ID','overlap_asc']],keys='assoc_ID')

    #print('write to file')
    hdu = fits.BinTableHDU(assoc_tmp,name='joined catalogue')
    hdu.writeto(basedir/version/HSTband/f'{scalepc}pc'/f'{name}_{HSTband}_{scalepc}pc_associations.fits',overwrite=True)


    nebulae_tmp = nebulae[['region_ID','x','y']].copy()
    nebulae_tmp.add_index('region_ID')

    nebulae_tmp['env_neb'] = [environment_dict[env_masks_neb.data[y,x]] for 
                              x,y in zip(nebulae_tmp['x'].astype(int),nebulae_tmp['y'].astype(int))]

    nebulae_tmp['neighbors'] = np.nan
    for row in nebulae_tmp:
        row['neighbors'] = len(find_neighbors(nebulae_mask.data,tuple(row[['x','y']]),row['region_ID'],plot=False))
    del nebulae_tmp[['x','y']]

    nebulae_tmp['1to1'] = False
    nebulae_tmp['1to1'][np.isin(nebulae_tmp['region_ID'],list(isolated_nebulae))] = True
    nebulae_tmp['Nassoc'] = [len(nebulae_dict[k]) for k in nebulae_tmp['region_ID']]
    nebulae_tmp['assoc_ID'] = np.nan
    nebulae_tmp['assoc_ID'][nebulae_tmp['1to1']] = [nebulae_dict[k][0] for k in nebulae_tmp[nebulae_tmp['1to1']]['region_ID']]


    overlap = join(
        Table(np.unique(nebulae_hst[~np.isnan(nebulae_hst)],return_counts=True),names=['region_ID','size']),
        Table(np.unique(nebulae_hst[~np.isnan(nebulae_hst) & ~np.isnan(associations_mask.data)],return_counts=True),names=['region_ID','overlap_size']),
        keys=['region_ID'],join_type='outer')
    overlap = overlap.filled(0)
    overlap['overlap_neb'] = overlap['overlap_size']/overlap['size']
    overlap['overlap_neb'].info.format = '%.2f'
    nebulae_tmp = join(nebulae_tmp,overlap[['region_ID','overlap_neb']],keys='region_ID')

    hdu = fits.BinTableHDU(nebulae_tmp,name='joined catalogue')
    hdu.writeto(basedir/version/HSTband/f'{scalepc}pc'/f'{name}_{HSTband}_{scalepc}pc_nebulae.fits',overwrite=True)
    #del nebulae_tmp['1to1']

    #print(f'{np.sum(nebulae_tmp["neighbors"]==0)} nebulae have no neighbors')

    catalogue = join(assoc_tmp,nebulae_tmp,keys=['assoc_ID','region_ID'])
    catalogue = join(catalogue,nebulae,keys='region_ID')
    catalogue = join(catalogue,associations,keys='assoc_ID')

    # pay attention to the order of assoc, neb
    catalogue.rename_columns(['X','Y','x','y','RA','DEC','cen_ra','cen_dec',
                              'reg_area','region_area',
                              'EBV_1','EBV_2','EBV_err','EBV_ERR',
                              'SkyCoord_1','SkyCoord_2'],
                             ['x_asc','y_asc','x_neb','y_neb','ra_asc','dec_asc','ra_neb','dec_neb',
                              'area_asc','area_neb',
                              'EBV_balmer','EBV_stars','EBV_balmer_err','EBV_stars_err',
                              'SkyCoord_asc','SkyCoord_neb'])

    # separation to other associations and nebulae
    idx,sep_asc,_= match_coordinates_sky(catalogue['SkyCoord_asc'],associations['SkyCoord'],nthneighbor=2)
    idx,sep_neb,_= match_coordinates_sky(catalogue['SkyCoord_neb'],nebulae['SkyCoord'],nthneighbor=2)
    catalogue['sep_asc'] = sep_asc.to(u.arcsec)
    catalogue['sep_neb'] = sep_neb.to(u.arcsec)

    # select the columns of the joined catalogue
    columns = ['assoc_ID','region_ID','x_asc','y_asc','x_neb','y_neb',
               'ra_asc','dec_asc','ra_neb','dec_neb','SkyCoord_asc','SkyCoord_neb',
               'env_asc','env_neb','area_asc','area_neb',
               'sep_asc','sep_neb','neighbors','Nassoc','overlap','overlap_asc','overlap_neb',
               'age','age_err','mass','mass_err','EBV_stars','EBV_stars_err','EBV_balmer','EBV_balmer_err',
               'met_scal','met_scal_err','logq_D91','logq_D91_err',] + \
                [x for x in nebulae.columns if x.endswith('_FLUX_CORR')] + \
                [x for x in nebulae.columns if x.endswith('_FLUX_CORR_ERR')] + \
                ['NUV_FLUX','NUV_FLUX_ERR','U_FLUX','U_FLUX_ERR','B_FLUX','B_FLUX_ERR',
                 'V_FLUX','V_FLUX_ERR','I_FLUX','I_FLUX_ERR'] 
    catalogue = catalogue[columns]
            
    catalogue.rename_columns([col for col in catalogue.columns if col.endswith('FLUX_CORR')],
                          [col.replace('FLUX_CORR','flux') for col in catalogue.columns if col.endswith('FLUX_CORR')])
    catalogue.rename_columns([col for col in catalogue.columns if col.endswith('FLUX_CORR_ERR')],
                          [col.replace('FLUX_CORR_ERR','flux_err') for col in catalogue.columns if col.endswith('FLUX_CORR_ERR')])
    catalogue['assoc_ID'] = catalogue['assoc_ID'].astype('int')
    catalogue['region_ID'] = catalogue['region_ID'].astype('int')

    catalogue.info.description = 'Joined catalogue between associations and nebulae'
    mean_sep = np.mean(catalogue['SkyCoord_asc'].separation(catalogue['SkyCoord_neb']))
    #print(f'{len(catalogue)} objects in catalogue')
    #print(f'the mean separation between cluster and association center is {mean_sep.to(u.arcsecond):.2f}')


    export = catalogue.copy() #[catalogue['contained']]
    #export.add_column(export['SkyCoord_asc'].to_string(style='hmsdms',precision=2),index=6,name='RaDec_asc')
    #export.add_column(export['SkyCoord_neb'].to_string(style='hmsdms',precision=2),index=8,name='RaDec_neb')

    RA_asc ,DEC_asc = zip(*[x.split(' ') for x in export['SkyCoord_asc'].to_string(style='hmsdms',precision=2)])
    RA_neb ,DEC_neb = zip(*[x.split(' ') for x in export['SkyCoord_neb'].to_string(style='hmsdms',precision=2)])

    export.add_column(RA_asc,index=6,name='Ra_asc')
    export.add_column(DEC_asc,index=8,name='Dec_asc')
    export.add_column(RA_neb,index=10,name='Ra_neb')
    export.add_column(DEC_neb,index=12,name='Dec_neb')

    for col in export.columns:
        if col not in ['Ra_asc','Dec_asc','Ra_neb','Dec_neb','region_ID','cluster_ID','overlap','env_asc','env_neb']:
            export[col].info.format = '%.2f'

    del export[['ra_asc','dec_asc','ra_neb','dec_neb','SkyCoord_neb','SkyCoord_asc']]

    hdu = fits.BinTableHDU(export,name='joined catalogue')
    hdu.writeto(basedir/version/HSTband/f'{scalepc}pc'/f'{name}_{HSTband}_{scalepc}pc_associations_and_nebulae_joined.fits',overwrite=True)
class ClusterFinder:
    '''
    Identify clusters in mock data.
    '''
    def __init__(self, galaxy, sort_key, d_proj, central_idx=None, \
                periodic=True, boxsize=0.0, max_r=3.0, background=None):
        '''
        Parameters
        ------------
        galaxy
            Input galaxy catalog. The first three columns should be 
            coordinates ['x', 'y', 'z'] and the catalog will be projected 
            along z-axis.
        sort_key : str
            Galaxies will be sorted according to sort_key of the catalog 
            for the initial percolation.
        d_proj : float
            Projection length. Cluster finder will see any galaxies within 
            d_c +- d_proj as members. It should use the same units as 
            coordinates.
        central_idx
            Index of central galaxies to be considered as potential 
            centers. If set to None, will consider all input galaxies.
        periodic : bool
            Whether the catalog has periodic boundary condition.
        boxsize : float
            Boxsize of the mock data. Used when dealing with periodic 
            boundary conditions.
        max_r : float
            Only galaxies within max_r of central galaxy will be considered
            in indentification.
        background : float
            Background galaxy density. If None, will assume V=boxsize**3 to 
            calculate.
        '''
        self.galaxy = Table(galaxy).copy()
        self.ng = len(self.galaxy)
        self.dproj = d_proj
        self.periodic = periodic
        self.boxsize = boxsize
        self.max_r = max_r
        self.sort_key = sort_key

        self.galaxy.add_columns([np.ones(self.ng, dtype=float), np.arange(self.ng)], \
                                names=['pfree', 'ID'])

        if central_idx is None:
            self.init_central = self.galaxy.copy()
        else:
            self.init_central = self.galaxy[central_idx].copy()
        self.init_central.remove_columns(['pfree'])
        self.init_central.add_column(np.zeros(len(self.init_central)),
                                     name='lambda')
        self.init_central.sort(sort_key, reverse=True)

        self.galaxy.add_index('ID')
        if background is None:
            self.background = self.ng / self.boxsize**3 * 2 * self.dproj
        else:
            self.background = background

    def initial_run(self, init_aperture=0.5):
        assigned = set()
        N_init_c = len(self.init_central)
        for i in range(N_init_c):
            print('Initial run... Finished:{}; Total:{}'.format(i, N_init_c),
                  end='\r')
            if self.init_central[i]['ID'] in assigned:
                continue
            temp_cluster = self.init_central[i]
            temp_galaxy = self.galaxy[self.galaxy['pfree'] != 0]
            temp_member = cylinder_filter(temp_galaxy, temp_cluster['pos'], \
                init_aperture, self.dproj, periodic=self.periodic, boxsize=self.boxsize)
            self.init_central['lambda'][i] = len(temp_member)
            assigned = assigned | set(temp_member['ID'])
        self.init_central = self.init_central[self.init_central['lambda'] >= 3]
        self.init_central.sort('lambda', reverse=True)
        self.init_central.add_index('ID')

    def percolation(self, compare_key=None):
        central_to_process = self.init_central.copy()
        # Identify the first cluster
        _central = central_to_process[0]
        f_cat, f_central = self.identify_single(_central, compare_key)
        f_central['cID'], f_cat['cID'] = 0, 0
        self.final_central = [f_central]
        self.final_member = [f_cat]
        tmem_gal = self.galaxy.loc[f_cat['ID']]
        tmem_gal['pfree'] = tmem_gal['pfree'] * (1 - f_cat['pmem'])
        central_to_process.remove_row(0)
        # Remove the potential centers having low pfree.
        low_pfree_id = tmem_gal[tmem_gal['pfree'] < 0.5]['ID']
        low_pfree_cluster = np.intersect1d(low_pfree_id,
                                           central_to_process['ID'])
        if len(low_pfree_cluster) > 0:
            low_pfree_cidx = central_to_process.loc_indices[low_pfree_cluster]
            central_to_process.remove_rows(low_pfree_cidx)

        # Loop over the list and do percolation.
        while len(central_to_process) > 0:
            N_central = len(self.final_central)
            print('Percolation... Finished:{}; Remaining:{}'.format(
                N_central, len(central_to_process)),
                  end='\r')
            _central = central_to_process[0]
            f_cat, f_central = self.identify_single(_central, compare_key)
            f_central['cID'], f_cat['cID'] = N_central, N_central
            self.final_central.append(f_central)
            self.final_member.append(f_cat)
            tmem_gal = self.galaxy.loc[f_cat['ID']]
            tmem_gal['pfree'] = tmem_gal['pfree'] * (1 - f_cat['pmem'])
            central_to_process.remove_row(0)

            low_pfree_id = tmem_gal[tmem_gal['pfree'] < 0.5]['ID']
            low_pfree_cluster = np.intersect1d(low_pfree_id,
                                               central_to_process['ID'])
            if len(low_pfree_cluster) > 0:
                low_pfree_cidx = central_to_process.loc_indices[
                    low_pfree_cluster]
                central_to_process.remove_rows(low_pfree_cidx)

    def identify_single(self, init_central, compare_key=None):
        if compare_key is None:
            compare_key = self.sort_key
        # Only consider galaxies close enough to the central.
        temp_central = Table(init_central)
        temp_cat = cylinder_filter(self.galaxy, temp_central['pos'],
                                   self.max_r, self.dproj)
        temp_cat.add_column(0.0, name='pmem')
        solver = lambda_solver(temp_cat['R'], temp_cat['pfree'],
                               self.background)
        # Galaxy must be outside of Rc if its pmem=0 and vice versa.
        temp_cat = temp_cat[solver['pmem'] > 0]
        # Iteration until the central galaxy is the most dominant one within Rc.
        while temp_cat[compare_key].max() != temp_central[compare_key]:
            temp_central = Table(temp_cat[temp_cat[compare_key].argmax()])
            temp_central.remove_columns(['pfree', 'R'])
            temp_central.add_column(0.0, name='lambda')
            temp_cat = cylinder_filter(self.galaxy, temp_central['pos'],
                                       self.max_r, self.dproj)
            temp_cat.add_column(0.0, name='pmem')
            solver = lambda_solver(temp_cat['R'], temp_cat['pfree'],
                                   self.background)
            temp_cat = temp_cat[solver['pmem'] > 0]

        temp_cat['pmem'] = solver['pmem'][solver['pmem'] > 0]
        temp_central['lambda'] = solver['lambda']
        temp_central.add_column(-1, name='cID')
        temp_cat.add_column(-1, name='cID')
        return temp_cat, temp_central
Beispiel #18
0
class halo_props:
    '''
    Systematically analyse the halo X-ray properties based 
    on other modules.

    Attributes
    -----------
    datatype : str
        A copy of the input type of simulation data.
    catalogue_original : pynbody.halo.HaloCatalogue
        The input halo catalogue.
    length : length of the input catalogue.
    host_id_of_top_level
        How catalogue record "hostHalo" for those halos 
        without a host halo. Default is 0.
    errorlist : list
        Record when the host halo ID of a certain subhalo is not 
        recorded in the catalogue (weird but will happen in
        ahf sometimes).
    rho_crit
        Critical density of the current snapshot in Msol kpc**-3.
    ovdens
        Virial overdensity factor :math:`\Delta_{vir}` of the current snapshot.
    dict : astropy.table.Table
        A copy of the halo.properties dictionary but in a table form
        to make future reference more convenient.
    haloid
        List of halo_id given by property dictionary.
    IDlist
        Table of halo_id and corresponding #ID given in the property 
        dictionary.
    hostid
        List of the halo_id of the host halo of each halo (originally 
        recorded in the property dictionary in the form of #ID).
    new_catalogue : dict
        The new catalogue which includes all the subhalo particles 
        in its host halo. The keys of the dictionary are the indexes of 
        halos in `catalogue_original`.
    prop
        Table of quantities corresponding to input field.
    host_list
        List of host halos.
    tophost
        halo_ids of the top-level host halo for each halo.
    children : list of sets
        Each set corresponds to the one-level down children of each halo.
    galaxy_list
        List of all galaxies (as long as n_star > 0).
    lumi_galaxy_list
        List of all luminous galaxies (self_m_star > galaxy_low_limit).
    galaxies : list of sets
        Each set corresponds to the embeded galaxies of each halo. All 
        the subhalos will not be considered and will have an empty set. 
        And for host halos it will include all the galaxies within it, 
        including the galaxies actually embedded in the subhalo (i.e., 
        the children of subhalo).
    lumi_galaxies
        Each set corresponds to the embeded luminous galaxies of each 
        halo. Same as `galaxies`, only care about host halo and include 
        all the luminous galaxies within.
    n_lgal
        Number of total luminous galaxies embedded in each halo. Again, 
        only care about host halos and the galaxies within subhalos 
        (i.e., subhalos themselves) will also be taken into account.
    group_list
        halo_id of the halo identified as group in the catalogue.
    '''
    def __init__(self,
                 halocatalogue,
                 datatype,
                 field=default_field,
                 host_id_of_top_level=0,
                 verbose=True):
        '''
        Initialization routine.

        Input
        -----
        ahfcatalogue : pynbody.halo.HaloCatalogue
            Only has been tested for pynbody.halo.AHFCatalogue
        field
            Quantities to calculate. When changing specific_mass_field, 
            luminosity_field and temp_field, source codes must be modified.
        datatype : str
            What kind of simulation data you are dealing with. 
            Accepted datatype for now: 'gizmo_ahf' and 'tipsy_ahf'.
        host_id_of_top_level
            How catalogue record "hostHalo" for those halos 
            without a host halo. Default is 0.
        '''
        self.datatype = datatype
        self.catalogue_original = halocatalogue
        self.length = len(self.catalogue_original)
        init_zeros = np.zeros(self.length)
        self.host_id_of_top_level = host_id_of_top_level
        self.errorlist = [{}, {}, {}]
        self.verbose = verbose

        self.rho_crit = pnb.analysis.cosmology.rho_crit(
            f=self.catalogue_original[1], unit='Msol kpc**-3')
        self.ovdens = cosmology.Delta_vir(self.catalogue_original[1])

        self.dict = []
        k = 0
        for j in range(self.length):
            i = j + 1
            prop = self.catalogue_original[i].properties
            # currently pynbody can not load the *substructure files
            # generated by MPI AHF correctly, so here we remove
            # substructure info even if it can be successfully loaded
            # with the none-MPI AHF files.
            prop.pop('children', None)
            prop.pop('parentid', None)

            hid = prop['halo_id']
            if i != hid:
                raise Exception('Attention! halo_id doesn\'t equal i !!!')
            self.dict.append(prop)

            if ((i // 100) != (k // 100)) and self.verbose:
                print('Loading properties... {:7} / {}'.format(i, self.length),
                      end='\r')
            k = i

        self.dict = Table(self.dict)
        self.haloid = self.dict['halo_id']
        IDs = self.dict['#ID']
        self.ID_list = Table([IDs, self.haloid], names=['#ID', 'halo_id'])
        self.ID_list.add_row([host_id_of_top_level, host_id_of_top_level])

        self.ID_list.add_index('#ID')

        host_in_IDlist = np.isin(self.dict['hostHalo'], self.ID_list['#ID'])
        # Some hostHalo id will not be listed in #ID list, this is probably due to AHF algorithm
        in_idx, = np.where(host_in_IDlist)
        _not_in_ = np.invert(host_in_IDlist)
        not_in_idx, = np.where(_not_in_)
        self.hostid = np.zeros(self.length, dtype=np.int)
        self.hostid[in_idx] = self.ID_list.loc[self.dict['hostHalo']
                                               [in_idx]]['halo_id']
        self.hostid = np.ma.array(self.hostid, dtype=np.int, mask=_not_in_)
        # loc method enables using #ID as index
        if len(not_in_idx) > 0:
            for error in not_in_idx:
                self.errorlist[0][
                    self.haloid[error]] = self.dict['hostHalo'][error]

        # prop initialization
        self.prop = {}
        for field_type in default_units:
            init_prop_table = [
                init_zeros for _ in range(len(field[field_type]))
            ]
            self.prop[field_type] = Table(init_prop_table,
                                          names=field[field_type])
            # astropy.table.Table is only used for generating a structured array more conveniently
            self.prop[field_type] = pnb.array.SimArray(
                self.prop[field_type], units=default_units[field_type])
        self.field = default_field
        self.field_units = default_units

        self._have_children = False
        self._have_galaxy = False
        self._have_group = False
        self._have_radii = False
        self._have_temp = False
        self._have_new_catalogue = False
        self._have_center = False

    def init_relationship(self,
                          galaxy_low_limit,
                          include_sub=False,
                          galaxy_mode='only stellar',
                          N_galaxy=3):
        '''
        Get basic information regarding groups, hosts, children, etc.

        Parameters
        ------------
        galaxy_low_limit : pynbody.array.SimArray
            Required by get_galaxy(). Limit above which galaxies will 
            be identified as luminous galaxies.
        include_sub
            Whether or not to include all the subhalo particles when 
            generating the new catalogue. See get_new_catalogue() for 
            details.
        N_galaxy : int
            Required by get_group_list(). Number of luminous galaxies 
            above which host halos are considered as groups.
        '''
        self.get_children()
        self.get_new_catalogue(include_=include_sub)
        self.get_galaxy(g_low_limit=galaxy_low_limit, mode=galaxy_mode)
        self.get_group_list(N_galaxy)
        self.get_center()

    def calcu_radii_masses(self,
                           halo_id_list=[],
                           rdict=None,
                           precision=1e-2,
                           rmax=None):
        '''
        Calculate radii (Rvir, R200, etc) and corresponding masses.

        Parameters
        -----------
        halo_id_list
            List of halo_ids to calculate radii and masses. 
            If set to empty list, then will use self.group_list.
        rdict : dict
            names and values for overdensity factors. Default is: 
            {'vir': self.ovdens, '200': 200, '500': 500, '2500': 2500}
        precision : float
            Precision for calculate radius. See get_index() in 
            calculate_R.py documentation for detail.
        rmax
            Maximum value for the shrinking sphere method. See 
            get_index() in calculate_R.py documentation for detail.
        '''
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_center:
            raise Exception('Must get_center first!')

        if rdict == None:
            rdict = {'vir': self.ovdens, '200': 200, '500': 500, '2500': 2500}
        t1 = 0
        t2 = 0
        list_length = np.array(list(halo_id_list)).max()
        k = 0
        for j in halo_id_list:
            i = j - 1
            prop = self.dict[i]
            t1 = time.time()
            MassRadii = cR.get_radius(self.new_catalogue[j], \
                    overdensities=list(rdict.values()), rho_crit=self.rho_crit, \
                        prop=prop, precision=precision, cen=self.center[i], rmax=rmax)
            for key in rdict:
                self.prop['R'][key][i] = MassRadii[1][rdict[key]]
                self.prop['M'][key][i] = MassRadii[0][rdict[key]]
            t2 = time.time()
            if ((i // 100) != (k // 100)) and self.verbose:
                print('Calculating radii and masses... {:7} / {}, time: \
                        {:.5f}s'.format(j, list_length, t2 - t1),
                      end='\r')
            k = i
        self._have_radii = True

    def calcu_specific_masses(self, halo_id_list=[], \
                calcu_field=radii_to_cal_sepcific_mass, \
                    temp_cut='5e5 K', nh_cut='0.13 cm**-3'):
        '''
        Calculate some specific masses, such as baryon, IGrM, etc.

        Parameters
        -----------
        halo_id_list
            List of halo_ids to calculate masses. 
            If set to empty list, then will use self.group_list.
        calcu_field
            Radii to calculate specific masses within.
        temp_cut
            Temperature limit above which gas particles are 
            considered as hot.
        nh_cut
            nh limit above which gas particles are considered 
            as star forming.
        '''
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_radii:
            raise Exception('Must get_radii_masses first!')

        list_length = np.array(list(halo_id_list)).max()
        k = 0
        for j in halo_id_list:
            i = j - 1

            prop = self.dict[i]
            center = self.center[i]
            halo = self.new_catalogue[j]
            tx = pnb.transformation.inverse_translate(halo, center)
            with tx:
                boxsize = halo.properties['boxsize'].in_units('kpc')
                original_pos = halo['pos'].copy()
                halo['pos'] = correct_pos(halo['pos'], boxsize)

                for r in calcu_field:
                    # Apply filters
                    subsim = halo[pnb.filt.Sphere(
                        self.prop['R'][i:i + 1][r].in_units('kpc'))]
                    cold_diffuse_gas = subsim.gas[pnb.filt.LowPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut)]
                    ISM = subsim.gas[pnb.filt.HighPass('nh', nh_cut)]
                    hot_diffuse_gas_ = subsim.gas[pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut)]

                    # Calculate masses
                    self.prop['M']['star' + r][i] = subsim.star['mass'].sum()
                    self.prop['M']['gas' + r][i] = subsim.gas['mass'].sum()
                    self.prop['M']['bar' + r][i] = self.prop['M']['star' + r][i] \
                                + self.prop['M']['gas' + r][i]
                    self.prop['M']['ism' + r][i] = ISM['mass'].sum()
                    self.prop['M']['cold' + r][i] = cold_diffuse_gas['mass'].sum() \
                                + self.prop['M']['ism' + r][i]
                    self.prop['M']['igrm' +
                                   r][i] = hot_diffuse_gas_['mass'].sum()

                halo['pos'] = original_pos
            if ((i // 100) != (k // 100)) and self.verbose:
                print('Calculating specific masses... {:7} / {}'.format(
                    j, list_length),
                      end='\r')
            k = i

    def calcu_temp_lumi(self, cal_file, halo_id_list=[], \
                    core_corr_factor=0.15, calcu_field='500', \
                    temp_cut='5e5 K', nh_cut='0.13 cm**-3', \
                    additional_filt=None):
        '''
        Calculate all the temperatures and luminosities listed in
        temp_field and luminosity_field. 

        Parameters
        -----------
        cal_file
            Calibration file used for calculating Tspec.
        halo_id_list
            List of halo_ids to calculate temperatures 
            and luminosities. If set to empty list, then will use 
            self.group_list.
        core_corr_factor
            Inner radius for calculating core-corrected 
            temperatures. Gas particles within 
            (core_corr_factor*R, R) will be used for calculation.
        calcu_field
            Radius to calculate temperatures and luminosities 
            within. Must be in radius_field. Default: R_500.
        temp_cut
            Temperature limit above which gas particles are 
            considered as hot.
        nh_cut
            nh limit above which gas particles are considered 
            as star forming.
        additional_filt
            Any additional filter used to constrain the hot diffuse 
            gas we are investigating.
        '''
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_radii:
            raise Exception('Must get_radii_masses first!')

        list_length = np.array(list(halo_id_list)).max()
        k = 0
        for j in halo_id_list:
            i = j - 1
            center = self.center[i]
            halo = self.new_catalogue[j]
            R = self.prop['R'][i:i + 1][calcu_field].in_units('kpc')
            tx = pnb.transformation.inverse_translate(halo, center)
            with tx:
                boxsize = halo.properties['boxsize'].in_units('kpc')
                original_pos = halo['pos'].copy()
                halo['pos'] = correct_pos(halo['pos'], boxsize)

                subsim = halo[pnb.filt.Sphere(R)]
                if additional_filt is None:
                    hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut)
                else:
                    hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut) & additional_filt
                hot_diffuse_gas_ = subsim.gas[hot_diffuse_filt]
                # cal_tweight can return the sum of weight_type at the same time.
                self.prop['T']['x'][i], self.prop['L']['x'][i] = \
                        cal_tweight(hot_diffuse_gas_, weight_type='Lx')
                self.prop['T']['x_cont'][i], self.prop['L']['x_cont'][i] = \
                        cal_tweight(hot_diffuse_gas_, weight_type='Lx_cont')
                self.prop['T']['mass'][i], _ = cal_tweight(hot_diffuse_gas_,
                                                           weight_type='mass')
                self.prop['T']['spec'][i] = pnb.array.SimArray(cal_tspec(hot_diffuse_gas_, \
                                cal_f=cal_file, datatype=self.datatype), units='keV')
                self.prop['T']['xb'][i], self.prop['L']['xb'][i] = \
                        cal_tweight(hot_diffuse_gas_, weight_type='Lxb')
                self.prop['L']['xb_cont'][i] = hot_diffuse_gas_[
                    'Lxb_cont'].sum()

                # Core-corrected temperatures:
                # Filter:
                corr_hot_ = hot_diffuse_gas_[~pnb.filt.
                                             Sphere(core_corr_factor * R)]

                self.prop['T']['spec_corr'][i] = pnb.array.SimArray(cal_tspec(corr_hot_, \
                                cal_f=cal_file, datatype=self.datatype), units='keV')
                self.prop['T']['x_corr'][i], self.prop['L']['x_corr'][
                    i] = cal_tweight(corr_hot_, weight_type='Lx')
                self.prop['T']['xb_corr'][i], self.prop['L']['xb_corr'][
                    i] = cal_tweight(corr_hot_, weight_type='Lxb')
                self.prop['T']['x_corr_cont'][i], _ = \
                                        cal_tweight(corr_hot_, weight_type='Lx_cont')
                self.prop['T']['mass_corr'][i], _ = cal_tweight(
                    corr_hot_, weight_type='mass')

                halo['pos'] = original_pos
            if ((i // 100) != (k // 100)) and self.verbose:
                print('Calculating temperatures and luminosities... {:7} / {}'\
                            .format(j, list_length), end='\r')
            k = i

        self._have_temp = True

    def calcu_entropy(self, cal_file, n_par=9, halo_id_list=[], \
                calcu_field=entropy_field, thickness=0.05, volume_type='full', \
                temp_cut='5e5 K', nh_cut='0.13 cm**-3', additional_filt=None):
        '''
        Calculate all entropy within a thin spherical shell 
        centered at halo.

        Parameters
        -----------
        cal_file
            Calibration file used for calculating Tspec.
        n_par : int
            Number of particles the shell must contain, 
            below which entropy will not be calculated.
        halo_id_list
            List of halo_ids to calculate entropies. 
            If set to empty list, then will use self.group_list.
        calcu_field
            Radii of the thin shell to calculate entropies.
        thickness : float
            Thickness Devided by radius of the spherical shell, i.e., 
            the shell will be R~(1+thickness)*R. 
        volume : str
            Volume used for calculating average electron number 
            density. 'gas' means only using the sum over the volumes 
            of all hot diffuse gas particles. 'full' means to use 
            4*pi*R^2*dR.
        temp_cut
            Temperature limit above which gas particles are 
            considered as hot.
        nh_cut
            nh limit above which gas particles are considered 
            as star forming.
        additional_filt
            Any additional filter used to constrain the hot diffuse 
            gas we are investigating.
        '''
        # thickness = pnb.array.SimArray(thickness, 'kpc')
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_radii:
            raise Exception('Must get_radii_masses first!')

        list_length = np.array(list(halo_id_list)).max()
        k = 0
        for j in halo_id_list:
            i = j - 1
            center = self.center[i]
            halo = self.new_catalogue[j]
            tx = pnb.transformation.inverse_translate(halo, center)
            with tx:
                boxsize = halo.properties['boxsize'].in_units('kpc')
                original_pos = halo['pos'].copy()
                halo['pos'] = correct_pos(halo['pos'], boxsize)

                for r in calcu_field:
                    R = self.prop['R'][i:i + 1][r].in_units('kpc')
                    subgas = halo.gas[pnb.filt.Annulus(R, (thickness + 1) * R)]
                    if additional_filt is None:
                        hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                                pnb.filt.LowPass('nh', nh_cut)
                    else:
                        hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                                pnb.filt.LowPass('nh', nh_cut) & additional_filt
                    hot_diffuse_gas_ = subgas[hot_diffuse_filt]
                    if len(hot_diffuse_gas_) < n_par:
                        self.prop['S'][r][i] = np.nan
                        self.prop['T']['spec' + r][i] = np.nan
                    else:
                        tempTspec = pnb.array.SimArray(cal_tspec(hot_diffuse_gas_, \
                                cal_f=cal_file, datatype=self.datatype), units='keV')
                        if volume_type == 'gas':
                            temp_volume = hot_diffuse_gas_['volume'].sum()
                        elif volume_type == 'full':
                            temp_volume = 4 / 3 * np.pi * ((
                                (thickness + 1) * R)**3 - R**3)
                        elif volume_type == 'full_approx':
                            temp_volume = 4 * np.pi * R**2 * thickness * R
                        else:
                            raise Exception("volume_type is not accepted!")
                        avg_ne = ((hot_diffuse_gas_['ne'] * hot_diffuse_gas_['volume']).sum() \
                                / temp_volume).in_units('cm**-3')
                        avg_nh = ((hot_diffuse_gas_['nh'] * hot_diffuse_gas_['volume']).sum() \
                                / temp_volume).in_units('cm**-3')
                        self.prop['T']['spec' + r][i] = tempTspec
                        self.prop['ne'][r][i] = avg_ne
                        self.prop['nh'][r][i] = avg_nh
                        self.prop['S'][r][i] = tempTspec / (avg_ne)**(2, 3)

                halo['pos'] = original_pos
            if ((i // 100) != (k // 100)) and self.verbose:
                print('            Calculating entropies... {:7} / {}'\
                            .format(j, list_length), end='\r')
            k = i

    def calcu_metallicity(self, halo_id_list=[], elements=['H', 'O', 'Si', 'Fe'], \
                radii=['500'], temp_cut='5e5 K', nh_cut='0.13 cm**-3', \
                additional_filt=None, weight_types=['mass', 'Lx']):

        if self.datatype[:5] == 'gizmo':
            self.metal_idx = {'He': 1, 'C': 2, 'N': 3, 'O': 4, \
                'Ne': 5, 'Mg': 6, 'Si': 7, 'S': 8, 'Ca': 9, 'Fe': 10}
        else:
            raise Exception('Currently only support GIZMO.')
        init_zeros = np.zeros(self.length)
        field_names = []
        for ele in elements:
            for rad in radii:
                for weight_type in weight_types:
                    field_names.append('Z_' + ele + rad + weight_type)
        init_prop_table = Table([init_zeros for _ in range(len(field_names))])
        self.prop['metals'] = Table(init_prop_table, names=field_names)
        self.prop['metals'] = pnb.array.SimArray(self.prop['metals'],
                                                 units='cm**-3')
        self.field['metals'] = field_names
        self.field_units['metals'] = 'cm**-3'

        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_radii:
            raise Exception('Must get_radii_masses first!')

        list_length = np.array(list(halo_id_list)).max()
        k = 0
        for j in halo_id_list:
            i = j - 1
            center = self.center[i]
            halo = self.new_catalogue[j]
            tx = pnb.transformation.inverse_translate(halo, center)
            with tx:
                boxsize = halo.properties['boxsize'].in_units('kpc')
                original_pos = halo['pos'].copy()
                halo['pos'] = correct_pos(halo['pos'], boxsize)

                for r in radii:
                    R = self.prop['R'][i:i + 1][r].in_units('kpc')
                    subgas = halo.gas[pnb.filt.Sphere(R)]
                    if additional_filt is None:
                        hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                                pnb.filt.LowPass('nh', nh_cut)
                    else:
                        hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                                pnb.filt.LowPass('nh', nh_cut) & additional_filt
                    igrm = subgas[hot_diffuse_filt]
                    for weight_type in weight_types:
                        weight_sum = igrm[weight_type].sum()

                        for ele in elements:
                            if ele != 'H':
                                # gas_nx = g_p.n_X(igrm['rho'], igrm['metals'][:, self.metal_idx[ele]], ele)
                                totZx = (
                                    igrm['metals'][:, self.metal_idx[ele]] *
                                    igrm[weight_type]).sum()
                            else:
                                # totNx = (igrm['nh'] * igrm[weight_type]).sum()
                                totZx = (igrm['X_H'] * igrm[weight_type]).sum()
                            self.prop['metals']['Z_' + ele + r + weight_type][i] = \
                                            (totZx/weight_sum)
                halo['pos'] = original_pos
            if ((i // 100) != (k // 100)) and self.verbose:
                print('            Calculating metallicities... {:7} / {}'\
                            .format(j, list_length), end='\r')
            k = i

    def savedata(self, filename, field=None, halo_id_list=[], units=None):
        '''
        Save the data in hdf5 format. Will save halo_id_list 
        (key: 'halo_id') and the quantities listed in field.

        Parameters
        -----------
        filename
            Filename of the hdf5 file.
        field
            Type of information to save.
        halo_id_list
            List of halo_ids to save.If set to empty list, 
            then will use self.group_list.
        units
            Convert the data into specified inits and save.
        '''
        if field is None:
            field = self.field
        if units is None:
            field_units = self.field_units
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            halo_id_list = self.group_list
        with h5py.File(filename, "w") as f:
            dataset = f.create_dataset("halo_id", data=halo_id_list)
            dataset.attrs[
                'Description'] = 'halo_ids of halos saved in this file.'
            dataset2 = f.create_dataset("N_lgal",
                                        data=self.n_lgal[halo_id_list - 1])
            dataset2.attrs['Description'] = 'Number of luminous galaxies'
            for attr in field:
                grp = f.create_group(attr)
                infos = field[attr]
                for info in infos:
                    data_to_save = self.prop[attr][info][halo_id_list - 1]
                    data_to_save.convert_units(field_units[attr])
                    dset = grp.create_dataset(info, data=data_to_save)
                    dset.attrs['units'] = str(data_to_save.units)

    def get_children(self):
        '''
        Generate list of children (subhalos) for each halo.
        Subhalo itself can also have children. And this list 
        will not contain "grandchildren" (i.e., the children 
        of children).
        '''
        self.host_list = []
        self.tophost = np.zeros(self.length).astype(np.int)
        self.children = [set() for _ in range(self.length)]
        k = 0
        for i in range(self.length):
            j = self.haloid[i]  #j = i + 1
            if ((j // 100) != (k // 100)) and self.verbose:
                print('Generating children list... Halo: {:7} / {}'.format(
                    j, self.length),
                      end='\r')
            k = j
            prop = self.dict[i]
            hostID = prop['hostHalo']
            if j in self.errorlist[0]:
                self.errorlist[1][j] = hostID
                continue
            try:
                if hostID == self.host_id_of_top_level:
                    self.host_list.append(j)
                    self.tophost[i] = j
                else:
                    if hostID < 0:
                        print(
                            'Make sure you\'ve used the correct host ID of the top-level halos!'
                        )
                    host_haloid = self.ID_list.loc[hostID]['halo_id']
                    self.children[host_haloid - 1].add(j)
                    temphost = j
                    while temphost != self.host_id_of_top_level:
                        temphost2 = temphost
                        temphost = self.hostid[temphost - 1]
                    self.tophost[i] = temphost2
            except IndexError:
                self.errorlist[1][j] = hostID
        self._have_children = True

    def get_new_catalogue(self, include_):
        '''
        Generate a new catalogue based on catalogue_original, 
        the new catalogue will include all the subhalo particles 
        in its host halo.
        
        Parameters
        -------------
        include_ : bool
            If True, then will include all the subhalo particles. 
            Otherwise will just be a copy of catalogue_original.
        '''
        if not self._have_children:
            raise Exception('Must get_children first!')
        if include_:
            self.new_catalogue = {}
            k = 0
            for i in range(self.length):
                j = self.haloid[i]
                if ((i // 100) != (k // 100)) and self.verbose:
                    print('Generating new catalogue... Halo: {:7} / {}'.format(
                        j, self.length),
                          end='\r')
                    k = i
                if len(self.children[i]) == 0:
                    self.new_catalogue[j] = self.catalogue_original[j]
                else:
                    union_list = [j] + list(self.children[i])
                    self.new_catalogue[j] = get_union(self.catalogue_original,
                                                      union_list)
        else:
            self.new_catalogue = self.catalogue_original
        self._have_new_catalogue = True

    def get_galaxy(self, g_low_limit, mode='only stellar'):
        '''
        Generate list of galaxies for each host halo. The subsubhalo 
        will also be included in the hosthalo galaxy list. And it won't 
        generate list for the subhalos even if there are galaxies within.

        Parameters
        -------------
        g_low_limit : pynbody.array.SimArray
            Limit above which galaxies will be identified as luminous 
            galaxies.
        '''
        if not self._have_children:
            raise Exception('Must get_children first!')
        if not self._have_new_catalogue:
            raise Exception('Must get_new_catalogue first!')

        self.galaxy_list = []  # List of all galaxies (as long as n_star > 0).
        self.lumi_galaxy_list = [
        ]  # List of all luminous galaxies (self_m_star > galaxy_low_limit).
        self.galaxies = [set() for _ in range(self.length)]
        self.lumi_galaxies = [set() for _ in range(self.length)]
        self.n_lgal = np.zeros(
            self.length
        )  # Number of total luminous galaxies embedded in each host halo.
        # The galaxies within subhalos (i.e., subhalos themselves) will also be taken into account.

        k = 0
        for i in range(self.length):
            j = self.haloid[i]
            if ((i // 100) != (k // 100)) and self.verbose:
                print('Calculating total stellar masses... Halo: {:7} / {}'.
                      format(j, self.length),
                      end='\r')
                k = i
            self.prop['M']['total_star'][i] = self.new_catalogue[j].star[
                'mass'].sum()
            sf_gas = self.new_catalogue[j].gas[pnb.filt.LowPass(
                'temp', '3e4 K')]
            # sf_gas = self.new_catalogue[j].gas[pnb.filt.HighPass('nh', '0.13 cm**-3')]
            self.prop['M']['total_sfgas'][i] = sf_gas['mass'].sum()
            # sf_gas, i.e., star forming gas, is used in the definition of resolved galaxies in Liang's Figure2.
            # But seems that Liang didn't plot Figure 2 using the concept of resolved galaxies.
        low_limit = g_low_limit.in_units(self.prop['M']['total_star'].units)
        k = 0
        for i in range(self.length):
            j = self.haloid[i]
            if ((i // 100) != (k // 100)) and self.verbose:
                print('            Identifying galaxies... Halo: {:7} / {}'.
                      format(j, self.length),
                      end='\r')
                k = i
            children_list = np.array(list(self.children[i]))
            if len(children_list) == 0:
                self_Mstar = self.prop['M']['total_star'][i]
                # if mode == 'include cold gas':
                self_Msfgas = self.prop['M']['total_sfgas'][i]
            else:
                children_union = get_union(self.new_catalogue,
                                           list(children_list))
                children_union_within_ = self.new_catalogue[j].intersect(
                    children_union)
                self_Mstar = self.prop['M']['total_star'][
                    i] - children_union_within_.star['mass'].sum()
                # if mode == 'include cold gas':
                sf_gas_union = children_union_within_.gas[pnb.filt.LowPass(
                    'temp', '3e4 K')]
                # sf_gas_union = children_union.gas[pnb.filt.HighPass('nh', '0.13 cm**-3')]
                self_Msfgas = self.prop['M']['total_sfgas'][i] - sf_gas_union[
                    'mass'].sum()
            self.prop['M']['self_star'][i] = self_Mstar
            self.prop['M']['self_sfgas'][i] = self_Msfgas
            try:
                if mode == 'only stellar':
                    condition = (self_Mstar > 0)
                elif mode == 'include cold gas':
                    condition = (self_Mstar + self_Msfgas > low_limit)
                if condition:
                    self.galaxy_list.append(j)
                    temp_tophost = self.tophost[i]
                    self.galaxies[temp_tophost - 1].add(j)

                    if self_Mstar > low_limit:
                        self.lumi_galaxy_list.append(j)
                        self.n_lgal[temp_tophost - 1] += 1
                        self.lumi_galaxies[temp_tophost - 1].add(j)
            except KeyError:
                self.errorlist[2][j] = self.dict['hostHalo'][i]
        self._have_galaxy = True

    def get_group_list(self, N_galaxy):
        '''
        halo_id of the halo identified as group in the catalogue. 

        Parameters
        -----------
        N_galaxy : int
            Number of luminous galaxies above which host halos 
            are considered as groups.
        '''
        if not self._have_galaxy:
            raise Exception('Must get_galaxy first!')
        self.group_list, = np.where(self.n_lgal >= N_galaxy)
        self.group_list += 1
        self._have_group = True

    def calcu_tx_lx(self, halo_id_list=[], \
                    core_corr_factor=0.15, calcu_field='500', \
                    temp_cut='5e5 K', nh_cut='0.13 cm**-3', additional_filt=None):
        '''
        Calculate X-ray luminosities and emission weighted 
        temperatures listed in temp_field and luminosity_field. 

        Parameters
        -----------
        halo_id_list
            List of halo_ids to calculate temperatures on. 
            If set to empty list, then will use self.group_list.
        core_corr_factor
            Inner radius for calculating core-corrected 
            temperatures. Gas particles within 
            (core_corr_factor*R, R) will be used for calculation.
        calcu_field
            Radius to calculate temperatures and luminosities 
            within. Must be in radius_field. Default: R_500.
        temp_cut
            Temperature limit above which gas particles are 
            considered as hot.
        nh_cut
            nh limit above which gas particles are considered 
            as star forming.
        additional_filt
            Any additional filter used to constrain the hot diffuse 
            gas we are investigating.
        '''
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_radii:
            raise Exception('Must get_radii_masses first!')
        if not self._have_new_catalogue:
            raise Exception('Must get_new_catalogue first!')

        list_length = np.array(list(halo_id_list)).max()
        for j in halo_id_list:
            i = j - 1
            if self.verbose:
                print('Calculating temperatures and luminosities... {:7} / {}'\
                            .format(j, list_length), end='\r')
            center = self.center[i]
            halo = self.new_catalogue[j]
            R = self.prop['R'][i:i + 1][calcu_field].in_units('kpc')
            tx = pnb.transformation.inverse_translate(halo, center)
            with tx:
                boxsize = halo.properties['boxsize'].in_units('kpc')
                original_pos = halo['pos'].copy()
                halo['pos'] = correct_pos(halo['pos'], boxsize)

                subsim = halo[pnb.filt.Sphere(R)]
                if additional_filt is None:
                    hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut)
                else:
                    hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut) & additional_filt
                hot_diffuse_gas_ = subsim.gas[hot_diffuse_filt]
                # cal_tweight can return the sum of weight_type at the same time.
                self.prop['T']['x'][i], self.prop['L']['x'][i] = \
                        cal_tweight(hot_diffuse_gas_, weight_type='Lx')
                self.prop['T']['x_cont'][i], self.prop['L']['x_cont'][i] = \
                        cal_tweight(hot_diffuse_gas_, weight_type='Lx_cont')

                # Core-corrected temperatures:
                # Filter:
                corr_hot_ = hot_diffuse_gas_[~pnb.filt.
                                             Sphere(core_corr_factor * R)]

                self.prop['T']['x_corr'][i], _ = cal_tweight(corr_hot_,
                                                             weight_type='Lx')
                self.prop['T']['x_corr_cont'][i], _ = \
                                        cal_tweight(corr_hot_, weight_type='Lx_cont')

                halo['pos'] = original_pos

    def calcu_tspec(self, cal_file, halo_id_list=[], \
                    core_corr_factor=0.15, calcu_field='500', temp_cut='5e5 K', \
                    nh_cut='0.13 cm**-3', additional_filt=None):
        '''
        Calculate spectroscopic temperatures based on Douglas's 
        pytspec module.

        Parameters
        -----------
        cal_file
            Calibration file used for calculating Tspec.
        halo_id_list
            List of halo_ids to calculate temperatures and 
            luminosities. If set to empty list, then will use 
            self.group_list.
        core_corr_factor
            Inner radius for calculating core-corrected temperatures. 
            Gas particles within (core_corr_factor*R, R) will be used 
            for calculation.
        calcu_field
            Radius to calculate temperatures and luminosities within. 
            Must be in radius_field. Default: R_500.
        temp_cut
            Temperature limit above which gas particles are 
            considered as hot.
        nh_cut
            nh limit above which gas particles are considered 
            as star forming.
        additional_filt
            Any additional filter used to constrain the hot diffuse 
            gas we are investigating.
        '''
        halo_id_list = np.array(halo_id_list, dtype=np.int).reshape(-1)
        if len(halo_id_list) == 0:
            if not self._have_group:
                raise Exception(
                    'Must get_group_list (or init_relationship) first!')
            halo_id_list = self.group_list
        if not self._have_radii:
            raise Exception('Must get_radii_masses first!')
        if not self._have_new_catalogue:
            raise Exception('Must get_new_catalogue first!')

        list_length = np.array(list(halo_id_list)).max()
        for j in halo_id_list:
            i = j - 1
            if self.verbose:
                print('Calculating spectroscopic temperatures... {:7} / {}'\
                            .format(j, list_length), end='\r')
            center = self.center[i]
            halo = self.new_catalogue[j]
            R = self.prop['R'][i:i + 1][calcu_field].in_units('kpc')
            tx = pnb.transformation.inverse_translate(halo, center)
            with tx:
                boxsize = halo.properties['boxsize'].in_units('kpc')
                original_pos = halo['pos'].copy()
                halo['pos'] = correct_pos(halo['pos'], boxsize)

                subsim = halo[pnb.filt.Sphere(R)]
                if additional_filt is None:
                    hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut)
                else:
                    hot_diffuse_filt = pnb.filt.HighPass('temp', temp_cut) & \
                            pnb.filt.LowPass('nh', nh_cut) & additional_filt
                hot_diffuse_gas_ = subsim.gas[hot_diffuse_filt]
                self.prop['T']['spec'][i] = pnb.array.SimArray(cal_tspec(hot_diffuse_gas_, \
                                cal_f=cal_file, datatype=self.datatype), units='keV')
                # Core-corrected temperatures:
                # Filter:
                corr_hot_ = hot_diffuse_gas_[~pnb.filt.
                                             Sphere(core_corr_factor * R)]
                self.prop['T']['spec_corr'][i] = pnb.array.SimArray(cal_tspec(corr_hot_, \
                                cal_f=cal_file, datatype=self.datatype), units='keV')

                halo['pos'] = original_pos

    def get_center(self):
        '''
        Calculate the center of the halos if an ahfcatalogue is 
        provided, then will automatically load the results in ahf. 
        Otherwise it will try to calculate the center coordinates 
        via gravitional potential or center of mass.

        Notes
        ------
        Due to a bug in pynbody, calculating center of mass will 
        lead to an incorrect result for the halos crossing the 
        periodical boundary of the simulation box. Make sure pynbody 
        has fixed it before you use.
        '''
        if self.datatype[-4:] == '_ahf':
            axes = ['Xc', 'Yc', 'Zc']
            tempcen = {}
            for axis in axes:
                tempcen[axis] = np.asarray(self.dict[axis],
                                           dtype=float).reshape(-1, 1)
            self.center = np.concatenate(
                (tempcen['Xc'], tempcen['Yc'], tempcen['Zc']), axis=1)
            self.center = pnb.array.SimArray(
                self.center,
                units='kpc') * self.dict['a'][0] / self.dict['h'][0]
            if self.datatype == 'tipsy_ahf':
                self.center -= self.dict['boxsize'][0].in_units('kpc') / 2
        else:
            self.center = pnb.array.SimArray(np.zeros((self.length, 3)),
                                             units='kpc')
            if 'phi' in self.new_catalogue[1].loadable_keys():
                center_mode = 'pot'
            else:
                center_mode = 'com'
            for i in range(self.length):
                j = self.haloid[i]
                print('Calculating center... {:7} / {}'.format(j, self.length),
                      end='\r')
                self.center[i] = pnb.analysis.halo.center(self.new_catalogue[j], \
                    mode=center_mode, retcen=True, vel=False)
        self._have_center = True
Beispiel #19
0
def make_todo(input_folder=None,
              cameras=None,
              ccds=None,
              overwrite=False,
              find_secondary_targets=True,
              output_file=None):
    """
	Create the TODO list which is used by the pipeline to keep track of the
	targets that needs to be processed.

	Will create the file `todo.sqlite` in the directory.

	Parameters:
		input_folder (string, optional): Input folder to create TODO list for.
			If ``None``, the input directory in the environment variable ``TESSPHOT_INPUT`` is used.
		cameras (iterable of integers, optional): TESS camera number (1-4). If ``None``, all cameras will be included.
		ccds (iterable of integers, optional): TESS CCD number (1-4). If ``None``, all cameras will be included.
		overwrite (boolean): Overwrite existing TODO file. Default=``False``.
		find_secondary_targets (boolean): Should secondary targets from TPFs be included? Default=True.
		output_file (string, optional): The file path where the output file should be saved.
			If not specified, the file will be saved into the input directory.
			Should only be used for testing, since the file would (proberly) otherwise end up with
			a wrong file name for running with the rest of the pipeline.

	Raises:
		NotADirectoryError: If the specified ``input_folder`` is not an existing directory.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    logger = logging.getLogger(__name__)

    # Check the input folder, and load the default if not provided:
    if input_folder is None:
        input_folder = os.environ.get(
            'TESSPHOT_INPUT',
            os.path.join(os.path.dirname(__file__), 'tests', 'input'))

    # Check that the given input directory is indeed a directory:
    if not os.path.isdir(input_folder):
        raise NotADirectoryError(
            "The given path does not exist or is not a directory")

    # Make sure cameras and ccds are iterable:
    cameras = (1, 2, 3, 4) if cameras is None else (cameras, )
    ccds = (1, 2, 3, 4) if ccds is None else (ccds, )

    # The TODO file that we want to create. Delete it if it already exits:
    if output_file is None:
        todo_file = os.path.join(input_folder, 'todo.sqlite')
    else:
        output_file = os.path.abspath(output_file)
        if not output_file.endswith('.sqlite'):
            output_file = output_file + '.sqlite'
        todo_file = output_file

    if os.path.exists(todo_file):
        if overwrite:
            os.remove(todo_file)
        else:
            logger.info("TODO file already exists")
            return

    # Number of threads available for parallel processing:
    threads_max = int(
        os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count()))

    # Load file with targets to be excluded from processing for some reason:
    exclude_file = os.path.join(os.path.dirname(__file__), 'data',
                                'todolist-exclude.dat')
    exclude = np.genfromtxt(exclude_file,
                            usecols=(0, 1, 2),
                            dtype=None,
                            encoding='utf-8')
    exclude = set([tuple(e) for e in exclude])

    # Create the TODO list as a table which we will fill with targets:
    cat = Table(names=('starid', 'sector', 'camera', 'ccd', 'datasource',
                       'tmag', 'cbv_area', 'edge_dist'),
                dtype=('int64', 'int32', 'int32', 'int32', 'S256', 'float32',
                       'int32', 'float32'))

    # Load list of all Target Pixel files in the directory:
    tpf_files = find_tpf_files(input_folder)
    logger.info("Number of TPF files: %d", len(tpf_files))

    if len(tpf_files) > 0:
        # Open a pool of workers:
        logger.info("Starting pool of workers for TPFs...")
        threads = min(
            threads_max, len(tpf_files)
        )  # No reason to use more than the number of jobs in total
        logger.info("Using %d processes.", threads)

        if threads > 1:
            pool = multiprocessing.Pool(threads)
            m = pool.imap_unordered
        else:
            m = map

        # Run the TPF files in parallel:
        tic = default_timer()
        _tpf_todo_wrapper = functools.partial(
            _tpf_todo,
            input_folder=input_folder,
            cameras=cameras,
            ccds=ccds,
            find_secondary_targets=find_secondary_targets,
            exclude=exclude)
        for cat2 in m(_tpf_todo_wrapper, tpf_files):
            cat = vstack([cat, cat2], join_type='exact')

        if threads > 1:
            pool.close()
            pool.join()

        # Amount of time it took to process TPF files:
        toc = default_timer()
        logger.info("Elaspsed time: %f seconds (%f per file)", toc - tic,
                    (toc - tic) / len(tpf_files))

        # Remove secondary TPF targets if they are also the primary target:
        indx_remove = np.zeros(len(cat), dtype='bool')
        cat.add_index('starid')
        for k, row in enumerate(cat):
            if row['datasource'].startswith('tpf:'):
                indx = cat.loc['starid', row['starid']]['datasource'] == 'tpf'
                if np.any(indx):
                    indx_remove[k] = True
        cat.remove_indices('starid')
        logger.info("Removing %d secondary TPF files as they are also primary",
                    np.sum(indx_remove))
        cat = cat[~indx_remove]

    # Find list of all HDF5 files:
    hdf_files = find_hdf5_files(input_folder, camera=cameras, ccd=ccds)
    logger.info("Number of HDF5 files: %d", len(hdf_files))

    if len(hdf_files) > 0:
        # TODO: Could we change this so we dont have to parse the filename?
        inputs = []
        for fname in hdf_files:
            m = re.match(r'sector(\d+)_camera(\d)_ccd(\d)\.hdf5',
                         os.path.basename(fname))
            inputs.append((input_folder, int(m.group(1)), int(m.group(2)),
                           int(m.group(3))))

        # Open a pool of workers:
        logger.info("Starting pool of workers for FFIs...")
        threads = min(threads_max, len(
            inputs))  # No reason to use more than the number of jobs in total
        logger.info("Using %d processes.", threads)

        if threads > 1:
            pool = multiprocessing.Pool(threads)
            m = pool.imap_unordered
        else:
            m = map

        tic = default_timer()
        ccds_done = 0
        for cat2 in m(_ffi_todo_wrapper, inputs):
            cat = vstack([cat, cat2], join_type='exact')
            ccds_done += 1
            logger.info("CCDs done: %d/%d", ccds_done, len(inputs))

        # Amount of time it took to process TPF files:
        toc = default_timer()
        logger.info("Elaspsed time: %f seconds (%f per file)", toc - tic,
                    (toc - tic) / len(inputs))

        if threads > 1:
            pool.close()
            pool.join()

    # Check if any targets were found:
    if len(cat) == 0:
        logger.error("No targets found")
        return

    # Remove duplicates!
    logger.info("Removing duplicate entries...")
    _, idx = np.unique(cat[('starid', 'sector', 'camera', 'ccd',
                            'datasource')],
                       return_index=True,
                       axis=0)
    cat = cat[np.sort(idx)]

    # If the target is present in more than one TPF file, pick the one
    # where the target is the furthest from the edge of the image
    # and discard the target in all the other TPFs:
    if find_secondary_targets:
        # Add an index column to the table for later use:
        cat.add_column(Column(name='priority', data=np.arange(len(cat))))

        # Create index that will only find secondary targets:
        indx = [row['datasource'].strip().startswith('tpf:') for row in cat]

        # Group the table on the starids and find groups with more than 1 target:
        # Equivalent to the SQL code "GROUP BY starid HAVING COUNT(*) > 1"
        remove_indx = []
        for g in cat[indx].group_by('starid').groups:
            if len(g) > 1:
                # Find the target farthest from the edge and mark the rest
                # for removal:
                logger.debug(g)
                im = np.argmax(g['edge_dist'])
                ir = np.ones(len(g), dtype='bool')
                ir[im] = False
                remove_indx += list(g[ir]['priority'])

        # Remove the list of duplicate secondary targets:
        logger.info("Removing %d secondary targets as duplicates.",
                    len(remove_indx))
        logger.debug(remove_indx)
        cat.remove_rows(remove_indx)

    # Exclude targets from exclude list:
    # Add an index and use that to search for starid, and then further check sector and datasource:
    cat.add_index('starid')
    remove_indx = []
    for ex in exclude:
        try:
            indx = np.atleast_1d(cat.loc_indices['starid', ex[0]])
        except KeyError:
            indx = []
        for i in indx:
            if cat[i]['sector'] == ex[1] and cat[i]['datasource'] == ex[2]:
                remove_indx.append(i)
    if remove_indx:
        del cat[remove_indx]
    cat.remove_indices('starid')

    # Load file with specific method settings and create lookup-table of them:
    methods_file = os.path.join(os.path.dirname(__file__), 'data',
                                'todolist-methods.dat')
    methods_file = np.genfromtxt(methods_file,
                                 usecols=(0, 1, 2, 3),
                                 dtype=None,
                                 encoding='utf-8')
    methods = {}
    for m in methods_file:
        methods[(m[0], m[1], m[2])] = m[3].strip().lower()

    # Sort the final list:
    cat.sort('tmag')

    # Write the TODO list to the SQLite database file:
    logger.info("Writing TODO file...")
    with contextlib.closing(sqlite3.connect(todo_file)) as conn:
        cursor = conn.cursor()

        # Change settings of SQLite file:
        cursor.execute("PRAGMA page_size=4096;")
        cursor.execute("PRAGMA foreign_keys=ON;")
        cursor.execute("PRAGMA locking_mode=EXCLUSIVE;")
        cursor.execute("PRAGMA journal_mode=TRUNCATE;")

        # Create TODO-list table:
        cursor.execute("""CREATE TABLE todolist (
			priority INTEGER PRIMARY KEY ASC NOT NULL,
			starid BIGINT NOT NULL,
			sector INTEGER NOT NULL,
			datasource TEXT NOT NULL DEFAULT 'ffi',
			camera INTEGER NOT NULL,
			ccd INTEGER NOT NULL,
			method TEXT DEFAULT NULL,
			tmag REAL,
			status INTEGER DEFAULT NULL,
			cbv_area INTEGER NOT NULL
		);""")

        for pri, row in enumerate(cat):
            # Find if there is a specific method defined for this target:
            method = methods.get((int(row['starid']), int(
                row['sector']), row['datasource'].strip()), None)

            # Add target to TODO-list:
            cursor.execute(
                "INSERT INTO todolist (priority,starid,sector,camera,ccd,datasource,tmag,cbv_area,method) VALUES (?,?,?,?,?,?,?,?,?);",
                (pri + 1, int(row['starid']), int(row['sector']),
                 int(row['camera']), int(
                     row['ccd']), row['datasource'].strip(), float(
                         row['tmag']), int(row['cbv_area']), method))

        conn.commit()
        cursor.execute(
            "CREATE INDEX starid_datasource_idx ON todolist (starid, datasource);"
        )  # FIXME: Should be "UNIQUE", but something is weird in ETE-6?!
        cursor.execute("CREATE INDEX status_idx ON todolist (status);")
        cursor.execute("CREATE INDEX starid_idx ON todolist (starid);")
        conn.commit()

        # Run a VACUUM of the table which will force a recreation of the
        # underlying "pages" of the file.
        # Please note that we are changing the "isolation_level" of the connection here,
        # but since we closing the conmnection just after, we are not changing it back
        conn.isolation_level = None
        cursor.execute("VACUUM;")

        # Close connection:
        cursor.close()

    logger.info("TODO done.")
Beispiel #20
0
class IRISSpectrograph(object):
    """An object to hold data from multiple IRIS raster scans."""
    def __init__(self, filenames, spectral_windows="All", common_axis=0):
        """Initializes an IRISSpectrograph object from IRIS level 2 files."""
        # default common axis is 0.
        if type(filenames) is str:
            filenames = [filenames]
        raster_index_to_file = []
        for f, filename in enumerate(filenames):
            hdulist = fits.open(filename)
            if f == 0:
                # collecting the window observations.
                windows_in_obs = np.array([
                    hdulist[0].header["TDESC{0}".format(i)]
                    for i in range(1, hdulist[0].header["NWIN"] + 1)
                ])
                # if spectral window is All then get every window else take the appropriate windows
                if spectral_windows == "All":
                    spectral_windows_req = windows_in_obs
                    window_fits_indices = range(1, len(hdulist) - 2)
                else:
                    if type(spectral_windows) is str:
                        spectral_windows_req = [spectral_windows]
                    spectral_windows_req = np.asarray(spectral_windows_req,
                                                      dtype="U")
                    window_is_in_obs = np.asarray([
                        window in windows_in_obs
                        for window in spectral_windows_req
                    ])
                    if not all(window_is_in_obs):
                        missing_windows = window_is_in_obs == False
                        raise ValueError(
                            "Spectral windows {0} not in file {1}".format(
                                spectral_windows[missing_windows],
                                filenames[0]))
                    window_fits_indices = np.nonzero(
                        np.in1d(windows_in_obs, spectral_windows))[0] + 1
                # Create table of spectral window info in OBS.
                self.spectral_windows = Table(
                    [[
                        hdulist[0].header["TDESC{0}".format(i)]
                        for i in window_fits_indices
                    ],
                     [
                         hdulist[0].header["TDET{0}".format(i)]
                         for i in window_fits_indices
                     ],
                     Quantity([
                         hdulist[0].header["TWAVE{0}".format(i)]
                         for i in window_fits_indices
                     ],
                              unit="angstrom"),
                     Quantity([
                         hdulist[0].header["TWMIN{0}".format(i)]
                         for i in window_fits_indices
                     ],
                              unit="angstrom"),
                     Quantity([
                         hdulist[0].header["TWMAX{0}".format(i)]
                         for i in window_fits_indices
                     ],
                              unit="angstrom")],
                    names=("name", "detector type", "brightest wavelength",
                           "min wavelength", "max wavelength"))
                # Set spectral window name as table index.
                self.spectral_windows.add_index("name")
                # creating a empty list for every spectral window and each spectral window
                # is a key for the dictionary.
                data_dict = dict([
                    (window_name, list())
                    for window_name in self.spectral_windows["name"]
                ])
                auxiliary_header = hdulist[-2].header
            # the unchanged header of the hdulist indexed 0.
            self.meta = hdulist[0].header
            for i, window_name in enumerate(self.spectral_windows["name"]):
                wcs_ = WCS(hdulist[window_fits_indices[i]].header)
                data_nan_masked = copy.deepcopy(
                    hdulist[window_fits_indices[i]].data)
                data_nan_masked[hdulist[window_fits_indices[i]].data ==
                                -200.] = np.nan
                data_mask = hdulist[window_fits_indices[i]].data == -200.
                # appending Cube instance to the corresponding window key in dictionary's list.
                data_dict[window_name].append(
                    Cube(data_nan_masked, wcs_, meta=self.meta,
                         mask=data_mask))

            scan_label = "scan{0}".format(f)
            # Append to list representing the scan labels of each
            # spectrum.
            len_raster_axis = hdulist[1].header["NAXIS3"]
            raster_index_to_file = raster_index_to_file + [scan_label
                                                           ] * len_raster_axis
            # Concatenate auxiliary data arrays from each file.
            try:
                auxiliary_data = np.concatenate(
                    (auxiliary_data, np.array(hdulist[-2].data)), axis=0)
            except UnboundLocalError as e:
                if e.args[
                        0] == "local variable 'auxiliary_data' referenced before assignment":
                    auxiliary_data = np.array(hdulist[-2].data)
                else:
                    raise e
            hdulist.close()

        self.auxiliary_data = Table()
        # Enter certain properties into auxiliary data table as
        # quantities with units.
        auxiliary_colnames = [key for key in auxiliary_header.keys()][7:]
        quantity_colnames = [("TIME", "s"), ("PZTX", "arcsec"),
                             ("PZTY", "arcsec"), ("EXPTIMEF", "s"),
                             ("EXPTIMEN", "s"), ("XCENIX", "arcsec"),
                             ("YCENIX", "arcsec"), ("OBS_VRIX", "m/s")]
        for col in quantity_colnames:
            self.auxiliary_data[col[0]] = _enter_column_into_table_as_quantity(
                col[0], auxiliary_header, auxiliary_colnames, auxiliary_data,
                col[1])
        # Enter remaining properties into table without units/
        for i, colname in enumerate(auxiliary_colnames):
            self.auxiliary_data[
                colname] = auxiliary_data[:, auxiliary_header[colname]]
        # Reorder columns so they reflect order in data file.
        self.auxiliary_data = self.auxiliary_data[[
            key for key in auxiliary_header.keys()
        ][7:]]
        # Rename some columns to be more user friendly.
        rename_colnames = [("EXPTIMEF", "FUV EXPOSURE TIME"),
                           ("EXPTIMEN", "NUV EXPOSURE TIME")]
        for col in rename_colnames:
            self.auxiliary_data.rename_column(col[0], col[1])
        # Add column designating what scan/file number each spectra
        # comes from.  This can be used to determine the corresponding
        # wcs object and level 1 info.
        self.auxiliary_data["scan"] = raster_index_to_file
        # Attach dictionary containing level 1 and wcs info for each file used.
        # Calculate measurement time of each spectrum.
        times = np.array([
            parse_time(self.meta["STARTOBS"]) + timedelta(seconds=s)
            for s in self.auxiliary_data["TIME"]
        ])
        # making a CubeSequence of every dictionary key window.
        self.data = dict([(window_name,
                           CubeSequence(data_dict[window_name],
                                        meta=self.meta,
                                        common_axis=common_axis,
                                        time=times))
                          for window_name in self.spectral_windows['name']])

    def __repr__(self):
        spectral_window = self.spectral_windows["name"][0]
        spectral_windows_info = "".join([
            "\n    {0}\n        (raster axis, slit axis, spectral axis) {1}".
            format(name, self.data[name].shape[1])
            for name in self.spectral_windows["name"]
        ])
        return "<iris.IRISSpectrograph instance\nOBS ID: {0}\n".format(self.meta["OBSID"]) + \
               "OBS Description: {0}\n".format(self.meta["OBS_DESC"]) + \
               "OBS period: {0} -- {1}\n".format(self.meta["STARTOBS"], self.meta["ENDOBS"]) + \
               "Instance period: {0} -- {1}\n".format(self.data[spectral_window].time[0],
                                                      self.data[spectral_window].time[-1]) + \
               "Number unique raster positions: {0}\n".format(self.meta["NRASTERP"]) + \
               "Spectral windows{0}>".format(spectral_windows_info)
Beispiel #21
0
class FilterGenerator(object):
    """
    Astronomical filter object generator.
    """
    def __init__(self, path=data_path):
        """
        Parameters
        ----------
        path : str (optional)
            Path to `fft.fits` file
        """
        self.path = path
        self.table = Table(fits.getdata(path))
        self.table.add_index('col0')

    def available_filters(self):
        """
        Return the available filters in the archive
        """
        return self.table['col0'].data

    def reconstruct(self, identifier, model=False):
        """
        Reconstruct an approximate filter transmittance curve for
        a given filter.

        Parameters
        ----------
        identifier : str
            Name of the filter. To see available filters, run
            `~tynt.Filter.available_filters()`
        model : bool
            Construct a composite astropy model which approximates the
            transmittance curve.

        Returns
        -------
        filt : `~tynt.Filter`
            Astronomical filter object.
        """
        filt = list(self.table.loc[identifier])[1:]
        n_lambda, lambda_0, delta_lambda, tr_max = filt[:4]
        fft = filt[4:]
        astropy_model = None

        wavelength = np.arange(lambda_0,
                               (n_lambda + 1) * delta_lambda + lambda_0,
                               delta_lambda)
        N = len(wavelength)

        ifft = np.fft.ifft(fft, n=len(wavelength))

        transmittance = ((ifft.real - ifft.real.min()) * tr_max /
                         ifft.real.ptp())

        if model:
            m = (np.sum([
                models.Sine1D(
                    amplitude=fft[i].real / N, frequency=i / N, phase=1 / 4)
                for i in range(len(fft))
            ]) - np.sum([
                models.Sine1D(amplitude=fft[i].imag / N, frequency=i / N)
                for i in range(len(fft))
            ]))

            @models.custom_model
            def fft_model(x):
                """
                Approximate Fourier reconstruction of an astronomical filter

                Parameters
                ----------
                x : `~np.ndarray`
                    Wavelength in Angstroms.

                Returns
                -------
                transmittance : `~np.ndarray`
                    Transmittance curve
                """
                mo = m(
                    (x - wavelength.min()) / (wavelength[1] - wavelength[0]))
                return (mo - mo.min()) * tr_max / mo.ptp()

            astropy_model = fft_model()

        return Filter(wavelength * u.Angstrom,
                      transmittance,
                      model=astropy_model)

    def download_true_transmittance(self, identifier):
        """
        Query the SVO service for a given filter,
        return the true transmittance curve.

        Parameters
        ----------
        identifier : str
            Name of the filter. To see available filters, run
            `~tynt.Filter.available_filters()`

        Returns
        -------
        filt : `~tynt.Filter`
            Astronomical filter object.
        """
        path = download_file('http://svo2.cab.inta-csic.es/'
                             'theory/fps3/fps.php?ID={0}'.format(identifier))

        true_transmittance = Table.read(path, format='votable')
        return Filter(true_transmittance['Wavelength'].data.data * u.Angstrom,
                      true_transmittance['Transmission'].data.data)
Beispiel #22
0
def load_tyc_teff():
    """Load the Tycho-2 effective temperatures."""
    print('Loading TYC2 effective temperatures')
    with tarfile.open(os.path.join('vizier', 'tyc2teff.tar.gz'), 'r:gz') as tf:
        # unfortunately the behaviour of the CDS reader results in high memory
        # usage during loading due to parsing non-used fields, so use a custom
        # implementation
        tyc_range = None
        teff_range = None
        with tf.extractfile('./ReadMe') as readme:
            re_file = re.compile(r'tycall.dat\ +[0-9]+\ +(?P<length>[0-9]+)')
            re_table = re.compile(
                r'Byte-by-byte Description of file: (?P<name>\S+)$')
            re_field = re.compile(
                r'''\ *(?P<start>[0-9]+)\ *-\ *(?P<end>[0-9]+) # range
                                  \ +\S+ # format
                                  \ +\S+ # units
                                  \ +(?P<label>\S+) # label''', re.X)
            record_count = None
            current_table = None
            for bline in readme.readlines():
                line = bline.decode('ascii')
                match = re_file.match(line)
                if match:
                    record_count = int(match.group('length'))
                    continue
                match = re_table.match(line)
                if match:
                    current_table = match.group('name')
                    continue
                if current_table != 'tycall.dat':
                    continue
                match = re_field.match(line)
                if not match:
                    continue
                if match.group('label') == 'Tycho':
                    tyc_range = int(match.group('start')) - 1, int(
                        match.group('end'))
                elif match.group('label') == 'Teff':
                    teff_range = int(match.group('start')) - 1, int(
                        match.group('end'))
                if tyc_range is not None and teff_range is not None:
                    break

        if record_count is None:
            raise RuntimeError('Could not get record count')
        if tyc_range is None or teff_range is None:
            raise RuntimeError('Could not find Tycho, Teff fields')

        with tf.extractfile('./tycall.dat.gz') as gzf, gzip.open(
                gzf, 'rt', encoding='ascii') as f:
            tycs = np.empty(record_count, dtype=np.int64)
            teffs = np.empty(record_count, dtype=np.float64)
            record = 0
            for line in f:
                try:
                    tycsplit = line[tyc_range[0]:tyc_range[1]].split('-')
                    tyc = int(tycsplit[0]) + int(tycsplit[1]) * 10000 + int(
                        tycsplit[2]) * 1000000000
                    teff = float(line[teff_range[0]:teff_range[1]])
                    if teff != 99999:
                        tycs[record] = tyc
                        teffs[record] = teff
                        record += 1
                except ValueError:
                    pass

        data = Table([tycs[0:record], teffs[0:record]],
                     names=('TYC', 'teff_val'))
        data['teff_val'].unit = u.K
        data.add_index('TYC')
        return unique(data, keys=['TYC'])
Beispiel #23
0
def mangle(sn, S, spec_mjd, filters, staticfilter=False, anchor_distance=100, verbose=False):
    """

    :param anchor_distance:
    :param spec_mjd:
    :param filters:
    :param staticfilter:
    :param verbose:
    :param sn:
    :param S:
    :return:
    """

    if hasattr(sn, "lcfit"):

        rows = OrderedDict()
        filter_dict = OrderedDict()

        for i, f in enumerate(filters):
            filter_dict[f] = functions.load_filter(os.path.join(defaults._default_filter_dir_path, f + ".dat"))
            filter_dict[f].calculate_edges()
            #     filter_dict[f].calculate_edges_zero()

            fit_flux = sn.lcfit.spline[f](spec_mjd)

            sn.phot.data_filters[f].resample_response(new_wavelength=S.wavelength)
            S_filter_flux = calc_spectrum_filter_flux(filter_object=sn.phot.data_filters[f], spectrum_object=S, verbose=verbose)
            S_filter_flux_no_area = calc_spectrum_filter_flux(filter_object=sn.phot.data_filters[f],
                                                                  spectrum_object=S,
                                                                  correct_for_area=False, verbose=verbose)
            mS_filter_flux = np.nan

            rows[f] = (fit_flux, S_filter_flux, S_filter_flux_no_area)
            if i == 0:
                data_table = Table(
                    names=("filter", "fitflux", "spec_filterflux", "mangledspec_filterflux", "filter_object", "mask"),
                    dtype=('S12', 'f4', 'f4', 'f4', object, bool))
            data_table.add_row((f, fit_flux, S_filter_flux, mS_filter_flux, filter_dict[f], True))

        for i, f in enumerate(data_table["filter_object"]):
            ## Test extent
            bool_uncontained = np.logical_or(f._lower_edge < S.min_wavelength, f._upper_edge > S.max_wavelength)
            if verbose: print(bool_uncontained)
            if bool_uncontained:
                data_table = data_table[np.where(data_table["filter"] != utils.b(f.filter_name))]

        knot_colours = [j._plot_colour for j in data_table["filter_object"] if hasattr(j, "_plot_colour")]
        data_table.add_column(Column(knot_colours, name="knot_colours"))
        data_table["lambda_eff"] = [i.lambda_effective.value for i in data_table["filter_object"]]

        if not staticfilter:
            w = 0
        else:
            w = np.where(data_table["filter"] == staticfilter)


        scale_factor = 1. / data_table[w]["fitflux"]
        if verbose: print("Scale Factor", scale_factor)
        norm_factor = data_table[w]["fitflux"] / data_table[w]["spec_filterflux"]
        if verbose: print("norm factor", norm_factor)
        data_table["fitflux"] = data_table["fitflux"] * scale_factor
        # "spec flux"
        data_table["spec_filterflux"] = data_table["spec_filterflux"] * scale_factor
        if verbose: print("scaled ", )

        S.flux = S.flux * scale_factor
        S.flux = S.flux * norm_factor
        S.scale_factor = scale_factor
        S.norm_factor = norm_factor

        data_table["spec_filterflux"] = data_table["spec_filterflux"] * norm_factor
        # ## Lower
        anchor_min_wavelength = np.nanmin([i._lower_edge for i in data_table["filter_object"]]) - anchor_distance
        # ## Upper
        anchor_max_wavelength = np.nanmax([i._upper_edge for i in data_table["filter_object"]]) + anchor_distance

        if verbose: print(data_table)
        if len(data_table) < 2:
            S.flux = S.flux / S.scale_factor

            fit_dict = OrderedDict()
            fit_dict["SpectrumObject"] = S
            fit_dict["final_spl"] = lambda x: np.ones(len(x))
            fit_dict["data_table"] = data_table

            return fit_dict

        mc_l, mc_u = functions.calc_linear_terms(data_table[data_table["mask"]], key="fitflux", verbose=verbose)
        anchor_l = mc_l[0] * anchor_min_wavelength + mc_l[1]
        anchor_u = mc_u[0] * anchor_max_wavelength + mc_u[1]


        spl_wav = S.data['wavelength'][
            np.logical_and(S.data['wavelength'] >= anchor_min_wavelength, S.data['wavelength'] <= anchor_max_wavelength)]

        mc_spec_l, mc_spec_u = functions.calc_linear_terms(data_table[data_table["mask"]], key="spec_filterflux", verbose=verbose)
        anchor_spec_l = mc_spec_l[0] * anchor_min_wavelength + mc_spec_l[1]
        anchor_spec_u = mc_spec_u[0] * anchor_max_wavelength + mc_spec_u[1]

        data_table.add_row(("lower_anchor", anchor_spec_l, anchor_spec_l, anchor_spec_u, np.nan, False,
                            colours.hex["batman"], anchor_min_wavelength))
        data_table.add_row(("upper_anchor", anchor_spec_u, anchor_spec_u, anchor_spec_u, np.nan, False,
                            colours.hex["batman"], anchor_max_wavelength))


        data_table.add_index("lambda_eff")
        data_table.sort()

        for i, f in enumerate(data_table["filter_object"]):
            if isinstance(f, classes.FilterClass):
                mangledspec_filterflux = calc_spectrum_filter_flux(filter_object=f, spectrum_object=S, verbose=verbose)
                #         print(data_table["spec_filterflux"][i], mangledspec_filterflux)
                data_table["mangledspec_filterflux"][i] = mangledspec_filterflux
            else:
                pass


        wanted_flux = data_table[data_table["mask"]]["fitflux"].data
        wanted_filters = data_table[data_table["mask"]]["filter_object"].data

        fit_dict = manglespec3(S, spec_mjd, wanted_filters, wanted_flux, data_table)

    return fit_dict
Beispiel #24
0
def compile_distances(name):
    '''Compare the measured distance to literature values from NED

    '''

    distances = ascii.read(basedir / 'data' / 'literature distances' /
                           f'{name}.csv',
                           delimiter=',',
                           comment='#')
    references = ascii.read(basedir / 'data' / 'literature distances' /
                            f'paper_list.csv',
                            encoding='utf8',
                            delimiter=',')

    ref_dict = {
        'Refcode': list(references['Refcode']),
        'Authors': list(references['Authors']),
        'Title': list(references['Title'])
    }

    new = 0
    # search for missing references
    for bibcode in distances['Refcode']:
        if bibcode not in ref_dict['Refcode']:
            # this shouldn't happen too often. So we open it in this loop
            from astroquery.nasa_ads import ADS
            ADS.TOKEN = open(basedir / 'notebooks' / 'ADS_DEV_KEY', 'r').read()
            try:
                result = ADS.query_simple(bibcode)
                ref_dict['Refcode'].append(bibcode)
                ref_dict['Authors'].append(';'.join(result['author'][0]))
                ref_dict['Title'].append(result['title'][0][0])
                new += 1
            except:
                print(f'can not find {bibcode} for {name}')

    if new > 0:
        print(f'{new} new items added')
        references = Table(ref_dict)

        references.sort('Refcode', reverse=True)
        with open(basedir / 'data' / 'literature distances' /
                  f'paper_list.csv',
                  'w',
                  encoding='utf8',
                  newline='\n') as f:
            ascii.write(references,
                        f,
                        format='csv',
                        overwrite=True,
                        delimiter=',')

    references.add_index('Refcode')
    references['year'] = [int(row['Refcode'][:4]) for row in references]
    references['firstAuthor'] = [
        row['Authors'].split(',')[0] for row in references
    ]
    references['name'] = [
        f'{row["firstAuthor"]}+{str(row["year"])[2:]}' for row in references
    ]

    distances['year'] = [int(row['Refcode'][:4]) for row in distances]
    distances['name'] = [
        references.loc[ref]['name'] for ref in distances['Refcode']
    ]
    base_url = 'https://ui.adsabs.harvard.edu/abs/'
    distances['link'] = [
        f'\href{{{base_url + row["Refcode"]}}}{{{row["name"]}}}'
        for row in distances
    ]

    # ugly workaround
    # some papers publish more than one distance. We use only the one with the smallest uncertainty
    #distances = distances[np.abs(distances['(m-M)']-distance)<1]
    '''
    distances['i'] = np.arange(0,len(distances))
    remove = []
    for i,row in enumerate(distances):
        name = row['name']
        sub = distances[np.where(distances['name']==name)]
        if len(sub) > 1:
            if row['err(m-M)'] > np.min(sub['err(m-M)']):
                remove.append(i)
    
    # only show the 5 most recent results for each method
    for method in np.unique(distances['Method']):
        sub = distances[distances['Method']==method].copy()
        if len(sub)>5:
            sub.sort('year')
            remove += list(sub[:-5]['i'])
    remove = list(set(remove))

    remove.sort(reverse=True)
    for i in remove:
        distances.remove_row(i)
    '''

    methods = []
    year = []
    DM = []
    errDM = []
    links = []
    marker = []
    ref = []
    names = []

    for g in distances.group_by(['Refcode', 'Method']).groups:
        methods.append(Methods.get(g['Method'][0], g['Method'][0]))
        year.append(g['year'][0])
        links.append(g['link'][0])
        DM.append(g['(m-M)'].mean())
        errDM.append(np.sqrt(np.sum(g['err(m-M)']**2)))
        ref.append(g['Refcode'][0])
        names.append(g['name'][0])
        if len(g) == 1:
            marker.append('o')
        else:
            marker.append('D')

    distances = Table([methods, year, DM, errDM, links, marker, ref, names],
                      names=[
                          'Method', 'year', '(m-M)', 'err(m-M)', 'link',
                          'marker', 'Refcode', 'name'
                      ])

    # only show the 5 most recent results for each method
    distances['i'] = np.arange(0, len(distances))
    max_entries = 5
    remove = []
    for method in np.unique(distances['Method']):
        sub = distances[distances['Method'] == method].copy()
        if len(sub) > max_entries:
            sub.sort('year')
            remove += list(sub[:-max_entries]['i'])
    remove = list(set(remove))

    remove.sort(reverse=True)
    for i in remove:
        distances.remove_row(i)

    # distances requires [Method,year,(m-M),err(m-M),link] as columns
    distances['sort_order'] = [
        importance.index(row['Method']) for row in distances
    ]
    distances.sort(['sort_order', 'year'])
    distances['y'] = np.arange(1, len(distances) + 1)

    return distances
Beispiel #25
0
lst = []
for name in tqdm(np.unique(nebulae['gal_name']),
                 position=0,
                 leave=False,
                 colour='green'):

    p = {x: sample_table.loc[name][x] for x in sample_table.columns}

    filename = data_ext / 'MUSE_DR2.1' / 'Nebulae catalogue' / 'spectra' / f'{name}_VorSpectra.fits'
    with fits.open(filename) as hdul:
        spectra = Table(hdul[1].data)
        spectral_axis = np.exp(Table(hdul[2].data)['LOGLAM']) * u.Angstrom

    spectra['region_ID'] = np.arange(len(spectra))
    spectra.add_index('region_ID')

    H0 = 67 * u.km / u.s / u.Mpc
    z = (H0 * Distance(distmod=p['(m-M)']) / c.c).decompose()
    lam_HA0 = 6562.8 * u.Angstrom
    lam_HA = (1 + z) * lam_HA0

    sub = nebulae[nebulae['gal_name'] == name][[
        'gal_name', 'region_ID', 'eq_width', 'HA6562_FLUX'
    ]]

    for row in tqdm(sub, position=1, leave=False, colour='red', desc=name):
        try:
            region_ID = row['region_ID']
            flux = spectra.loc[region_ID]['SPEC'] * u.erg / u.s / u.cm**2 / u.A
            fit = fit_emission_line(spectral_axis, flux, lam_HA)
Beispiel #26
0
def get_pec_data(image_dir, ref_image='guide_000.new', img_prefix='',
                 observer=None, phase_length=480,
                 skip_solved=True, verbose=False, parallel=False, **kwargs):

    assert observer is not None, "Observer required"

    # Gather all the images
    base_dir = os.getenv('PANDIR', '/var/panoptes')
    target_name, obs_date_start = image_dir.rstrip('/').split('/', 1)
    target_dir = '{}/images/fields/{}'.format(base_dir, image_dir)

    guide_images = glob.glob('{}/guide_*.new'.format(target_dir))
    if len(guide_images) == 0:
        print("No solved guide images found")
        guide_images = glob.glob('{}/guide_*.cr2'.format(target_dir))
    guide_images.sort()

    # WCS Information
    # Solve the guide image if given a CR2
    ref_image = guide_images[-1]
    ref_solve_info = None
    if ref_image.endswith('cr2'):
        if verbose:
            print("Solving guide image")
        ref_solve_info = get_solve_field(ref_image, verbose=verbose)
        if verbose:
            print("Solved guide image info: {}".format(ref_solve_info))
        ref_image = ref_image.replace('cr2', 'new')

    # If no guide image, attempt a solve on similar fits
    # Note: not sure this is needed any more
    if not os.path.exists(ref_image):
        if os.path.exists(ref_image.replace('new', 'fits')):
            ref_solve_info = get_solve_field(ref_image.replace('new', 'fits'))

    if verbose and ref_solve_info:
        print(ref_solve_info)

    assert os.path.exists(ref_image), warnings.warn("Ref image does not exist: {}".format(ref_image))
    ref_header = fits.getheader(ref_image)
    ref_wcs = get_wcsinfo(ref_image)
    # Reference time
    t0 = Time(ref_header.get('DATE-OBS', date_parser.parse(obs_date_start))).datetime
    if verbose:
        print("Reference image: {}".format(ref_image))
        print("Reference time: {}".format(t0))

    # Image sequence
    image_files = glob.glob('{}/{}*.cr2'.format(target_dir, img_prefix))
    image_files.sort()

    if verbose:
        print("Found {} images in sequence".format(len(image_files)))

    img_info = []

    # Solves an individual image in the sequence
    def solver(img):
        if verbose:
            print('*' * 80)
        header_info = {}
        img_wcs_path = img.replace('cr2', 'wcs')
        if not os.path.exists(img_wcs_path):
            if verbose:
                print("No WCS, solving CR2: {}".format(img))

            # Give the guide image RA/Dec as a guess since it should be close
            header_info = get_solve_field(
                img,
                ra=ref_wcs['ra_center'].value,
                dec=ref_wcs['dec_center'].value,
                radius=10,
                verbose=verbose,
                **kwargs
            )

        # Gather all the header information for the image
        if len(header_info) == 0:
            header_info.update(get_wcsinfo(img_wcs_path))
            header_info.update(fits.getheader(img.replace('cr2', 'new')))
            header_info.update(read_exif(img))

        # Lowercase all header names
        hi = dict((k.lower(), v) for k, v in header_info.items())
        del(hi['history'])
        del(hi['comment'])
        if verbose:
            pprint(hi)

        # Add header info to image info
        img_info.append(hi)

    # Solve all of our images
    # Note: Could do this in parralel
    for img in image_files:
        if verbose:
            print("Solving for {}".format(img))
        solver(img)

    # Get the center RA/Dec for all images
    ras = [w['ra_center'].value for w in img_info]
    decs = [w['dec_center'].value for w in img_info]

    # Get the center RA/Dec in arcseconds.  (??? - used for HA below)
    ras_as = [w['ra_center'].to(u.arcsec).value for w in img_info]
    decs_as = [w['dec_center'].to(u.arcsec).value for w in img_info]

    # List of times for sequqnce
    time_range = [Time(w.get('date-obs', t0)) for w in img_info]

    # Get the Hourangle from the observer
    ha = []
    ha = np.array([observer.target_hour_angle(t, SkyCoord(ras[idx], decs[idx], unit='degree')).to(u.degree).value
                   for idx, t in enumerate(time_range)])

    ha[ha > 270] = ha[ha > 270] - 360

    # Get time deltas between each timestamp
    dt = np.diff([t.datetime.timestamp() for t in time_range])
    # Add the offset for initial time
    dt = np.insert(dt, 0, (time_range[0].datetime.timestamp() - t0.timestamp()))
    # Total offset for each image
    t_offset = np.cumsum(dt)

    # Arcsecond difference between each image for RA
    ra_diff = np.diff(ras_as)
    ra_diff = np.insert(ra_diff, 0, 0)

    # Arcsecond difference between each image for Dec
    dec_diff = np.diff(decs_as)
    dec_diff = np.insert(dec_diff, 0, 0)

    # Delta arcsecond
    dra_as = pd.Series(ra_diff)
    ddec_as = pd.Series(dec_diff)

    # Delta arcsecond rate
    dra_as_rate = dra_as / dt
    ddec_as_rate = ddec_as / dt

    # Fill in empty values
    dra_as_rate.fillna(value=0, inplace=True)
    ddec_as_rate.fillna(value=0, inplace=True)

    if verbose:
        print(len(ra_diff))
        print(len(dec_diff))
        print(len(dt))
        print(len(t_offset))
        print(len(ras))
        print(len(decs))

    table = Table({
        'dec': decs,
        'dec_as': ddec_as,
        'dec_as_rate': ddec_as_rate,
        'dt': dt,
        'ha': ha,
        'ra': ras,
        'ra_as': dra_as,
        'ra_as_rate': dra_as_rate,
        'offset': t_offset,
        'time_range': [t.mjd for t in time_range],
    }, meta={
        'name': target_name,
        'obs_date_start': obs_date_start,
    })

    table.add_index('time_range')

    table['ra'].format = '%+3.3f'
    table['ha'].format = '%+3.3f'
    table['dec'].format = '%+3.3f'
    table['dec_as_rate'].format = '%+1.5f'
    table['ra_as_rate'].format = '%+1.5f'
    table['time_range'].format = '%+5.5f'
    table['ra_as'].format = '%+2.3f'
    table['dec_as'].format = '%+3.3f'

    return table
Beispiel #27
0
def main(args):

    t = time.perf_counter()

    if os.path.isdir(args.output_path):
        os.chdir(args.output_path)
    else:
        os.makedirs(args.output_path, exist_ok=True)

    if args.arm:
        do_red = args.arm.lower() == 'red'
        do_blue = args.arm.lower() == 'blue'
    else:
        do_red = True
        do_blue = True

    red_root = os.path.join(args.root, 'red')
    blue_root = os.path.join(args.root, 'blue')
    qa_dict = {}

    if args.parameter_file:
        blue_user_config_lines = reduction.parse_pypeit_parameter_file(args.parameter_file, 'p200_dbsp_blue')
        red_user_config_lines = reduction.parse_pypeit_parameter_file(args.parameter_file, 'p200_dbsp_red')
    else:
        blue_user_config_lines = []
        red_user_config_lines = []

    if args.debug:
        pypeit.display.display.connect_to_ginga(raise_err=True, allow_new=True)

    if do_red:
        red_files = fix_headers.main(red_root, args.no_interactive, args.no_interactive)
        context = reduction.setup(red_files, args.output_path, 'p200_dbsp_red')
        # optionally use interactive correction
        if not args.no_interactive:
            interactive_correction(context[0])
        pypeit_file_red = reduction.write_setup(context, 'all', 'p200_dbsp_red', red_user_config_lines)[0]

    if do_blue:
        blue_files = fix_headers.main(blue_root, args.no_interactive, args.no_interactive)
        context = reduction.setup(blue_files, args.output_path, 'p200_dbsp_blue')
        if not args.no_interactive:
            interactive_correction(context[0])
        pypeit_file_blue = reduction.write_setup(context, 'all', 'p200_dbsp_blue', blue_user_config_lines)[0]


    plt.switch_backend("agg")
    # TODO: parallelize this
    # Would need to look like
    # Splitting up the .pypeit files into bits and pieces
    # Oooh what if I just do the calibration first
    # and then parallelize the reduction
    output_spec1ds_blue = set()
    output_spec1ds_red = set()
    if do_red:
        output_spec1ds_red, output_spec2ds_red = reduction.redux(pypeit_file_red, args.output_path)
        qa_dict = qa.save_2dspecs(qa_dict, output_spec2ds_red, args.output_path, 'p200_dbsp_red')
    if do_blue:
        output_spec1ds_blue, output_spec2ds_blue = reduction.redux(pypeit_file_blue, args.output_path)
        qa_dict = qa.save_2dspecs(qa_dict, output_spec2ds_blue, args.output_path, 'p200_dbsp_blue')

    if do_red or do_blue:
        qa.write_extraction_QA(qa_dict, args.output_path)

    if do_red:
        verification_counter = 0
        red_pypeit_files = reduction.verify_spec1ds(output_spec1ds_red, verification_counter, args.output_path)
        while red_pypeit_files:
            verification_counter += 1

            out_1d, out_2d = reduction.re_redux(red_pypeit_files, args.output_path)
            red_pypeit_files = reduction.verify_spec1ds(out_1d, verification_counter, args.output_path)
            qa_dict = qa.save_2dspecs(qa_dict, out_2d, args.output_path, 'p200_dbsp_red')

            output_spec1ds_red |= out_1d
            output_spec2ds_red |= out_2d
    if do_blue:
        verification_counter = 0
        blue_pypeit_files = reduction.verify_spec1ds(output_spec1ds_blue, verification_counter, args.output_path)
        while blue_pypeit_files:
            verification_counter += 1

            out_1d, out_2d = reduction.re_redux(blue_pypeit_files, args.output_path)
            blue_pypeit_files = reduction.verify_spec1ds(out_1d, verification_counter, args.output_path)
            qa_dict = qa.save_2dspecs(qa_dict, out_2d, args.output_path, 'p200_dbsp_blue')

            output_spec1ds_blue |= out_1d
            output_spec2ds_blue |= out_2d

    # TODO: use a do/while loop to iterate on the manual extraction GUI until user is satisfied
    if args.manual_extraction:
        # wait for user acknowledgement
        input("Ready for manual extraction? If using GNU screen/tmux behind ssh, make sure to check that $DISPLAY is correct.")
        plt.switch_backend("Qt5Agg")

        if do_red:
            red_manual_pypeit_files = reduction.manual_extraction(output_spec2ds_red, pypeit_file_red, args.output_path)
        if do_blue:
            blue_manual_pypeit_files = reduction.manual_extraction(output_spec2ds_blue, pypeit_file_blue, args.output_path)
        if do_red and red_manual_pypeit_files:
            out_1d, out_2d = reduction.re_redux(red_manual_pypeit_files, args.output_path)
            qa.save_2dspecs(qa_dict, out_2d, args.output_path, 'p200_dbsp_red')

            output_spec1ds_red |= out_1d
            output_spec2ds_red |= out_2d
        if do_blue and blue_manual_pypeit_files:
            out_1d, out_2d = reduction.re_redux(blue_manual_pypeit_files, args.output_path)
            qa.save_2dspecs(qa_dict, out_2d, args.output_path, 'p200_dbsp_blue')

            output_spec1ds_blue |= out_1d
            output_spec2ds_blue |= out_2d

    # spec1d_blueNNNN-OBJ_DBSPb_YYYYMMMDDTHHMMSS.SPAT.fits
    fname_len = 72
    # sens_blueNNNN-OBJ_DBSPb_YYYYMMMDDTHHMMSS.SPAT.fits
    sensfunc_len = 70
    # Find standards and make sensitivity functions
    spec1d_table = Table(names=('filename', 'arm', 'object', 'frametype',
                            'airmass', 'mjd', 'sensfunc', 'exptime'),
                         dtype=(f'U{fname_len}', 'U4', 'U20', 'U8',
                            float, float, f'U{sensfunc_len}', float))

    # Ingest spec_1d tables
    spec1ds = output_spec1ds_red | output_spec1ds_blue
    for spec1d in spec1ds:
        path = os.path.join(args.output_path, 'Science', spec1d)
        with fits.open(path) as hdul:
            head0 = hdul[0].header
            head1 = hdul[1].header
            arm = 'red' if 'red' in head0['PYP_SPEC'] else 'blue'
            spec1d_table.add_row((spec1d, arm, head0['TARGET'],
                head1['OBJTYPE'], head0['AIRMASS'],
                head0['MJD'], '', head0['EXPTIME']))
    spec1d_table.add_index('filename')
    spec1d_table.sort(['arm', 'mjd'])

    if do_red:
        for row in spec1d_table[(spec1d_table['arm'] == 'red') & (spec1d_table['frametype'] == 'standard')]:
            sensfunc = fluxing.make_sensfunc(row['filename'], args.output_path, 'p200_dbsp_red', red_user_config_lines)
            if sensfunc == "":
                spec1d_table['frametype'][spec1d_table['filename'] == row['filename']] = 'science'
            else:
                spec1d_table['sensfunc'][spec1d_table['filename'] == row['filename']] = sensfunc
    if do_blue:
        for row in spec1d_table[(spec1d_table['arm'] == 'blue') & (spec1d_table['frametype'] == 'standard')]:
            sensfunc = fluxing.make_sensfunc(row['filename'], args.output_path, 'p200_dbsp_blue', blue_user_config_lines)
            if sensfunc == "":
                spec1d_table['frametype'][spec1d_table['filename'] == row['filename']] = 'science'
            else:
                spec1d_table['sensfunc'][spec1d_table['filename'] == row['filename']] = sensfunc

    if do_red:
        arm = spec1d_table['arm'] == 'red'
        stds = (spec1d_table['frametype'] == 'standard') & arm
        if np.any(stds):
            for row in spec1d_table[arm]:
                if row['frametype'] == 'science':
                    best_sens = spec1d_table[stds]['sensfunc'][np.abs(spec1d_table[stds]['airmass'] - row['airmass']).argmin()]
                elif row['frametype'] == 'standard':
                    if (stds).sum() == 1:
                        best_sens = spec1d_table[stds]['sensfunc'][np.abs(spec1d_table[stds]['airmass'] - row['airmass']).argmin()]
                    else:
                        best_sens = spec1d_table[stds]['sensfunc'][np.abs(spec1d_table[stds]['airmass'] - row['airmass']).argsort()[1]]
                spec1d_table.loc[row['filename']]['sensfunc'] = best_sens
        else:
            for filename in spec1d_table[arm]['filename']:
                spec1d_table.loc[filename]['sensfunc'] = ''
    if do_blue:
        arm = spec1d_table['arm'] == 'blue'
        stds = (spec1d_table['frametype'] == 'standard') & arm
        if np.any(stds):
            for row in spec1d_table[arm]:
                if row['frametype'] == 'science':
                    best_sens = spec1d_table[stds]['sensfunc'][np.abs(spec1d_table[stds]['airmass'] - row['airmass']).argmin()]
                elif row['frametype'] == 'standard':
                    if (stds).sum() == 1:
                        best_sens = spec1d_table[stds]['sensfunc'][np.abs(spec1d_table[stds]['airmass'] - row['airmass']).argmin()]
                    else:
                        best_sens = spec1d_table[stds]['sensfunc'][np.abs(spec1d_table[stds]['airmass'] - row['airmass']).argsort()[1]]
                spec1d_table.loc[row['filename']]['sensfunc'] = best_sens
        else:
            for filename in spec1d_table[arm]['filename']:
                spec1d_table.loc[filename]['sensfunc'] = ''

    # build fluxfile
    if do_red:
        spec1d_to_sensfunc = {row['filename']: row['sensfunc'] for row in spec1d_table if row['arm'] == 'red'}
        red_fluxfile = fluxing.build_fluxfile(spec1d_to_sensfunc, args.output_path, 'p200_dbsp_red', red_user_config_lines)
    if do_blue:
        spec1d_to_sensfunc = {row['filename']: row['sensfunc'] for row in spec1d_table if row['arm'] == 'blue'}
        blue_fluxfile = fluxing.build_fluxfile(spec1d_to_sensfunc, args.output_path, 'p200_dbsp_blue', blue_user_config_lines)

    # flux data
    if do_red:
        fluxing.flux(red_fluxfile, args.output_path)
    if do_blue:
        fluxing.flux(blue_fluxfile, args.output_path)

    # coadd - intelligent coadding of multiple files
    # first make a column "coaddID" that is the same for frames to be coadded
    # TODO: when there are multiple exposures of an object, splice/output all of them
    coaddIDs = []
    if args.null_coadd:
        coaddIDs = range(len(spec1d_table))
    else:
        previous_row : Row = None
        S_PER_DAY = 24 * 60 * 60
        thresh = 15
        for i, row in enumerate(spec1d_table):
            if i == 0:
                coaddIDs.append(0)
            else:
                # if this is the same object as the last one
                # and they were taken consecutively
                if ((row['arm'] == previous_row['arm']) and
                    (row['object'] == previous_row['object']) and
                    ((row['mjd']*S_PER_DAY - previous_row['mjd']*S_PER_DAY
                        - previous_row['exptime']) < previous_row['exptime'])):
                    coaddIDs.append(coaddIDs[-1])
                else:
                    coaddIDs.append(coaddIDs[-1] + 1)
            previous_row = row

    spec1d_table.add_column(coaddIDs, name="coadd_id")

    # figure out where on detector likely target is
    spec1d_table.add_column(Column(name="spats", dtype=object, length=len(spec1d_table)))
    spec1d_table.add_column(Column(name="fracpos", dtype=object, length=len(spec1d_table)))
    all_spats = []
    all_fracpos = []
    # for each spec1d file
    for filename in spec1d_table['filename']:
        path = os.path.join(args.output_path, 'Science', filename)
        with fits.open(path) as hdul:
            spats = []
            fracpos = []
            for i in range(1, len(hdul) - 1):
                # grab all of its extensions' spatial positions
                spats.append(int(hdul[i].name.split('-')[0].lstrip('SPAT')))
                fracpos.append(hdul[i].header['SPAT_FRACPOS'])
            spats.sort()
            fracpos.sort()
            all_spats.append(spats)
            all_fracpos.append(fracpos)
            spec1d_table.loc[filename]['spats'] = spats
            spec1d_table.loc[filename]['fracpos'] = fracpos
    # add to table???
    # this needs to be dtype object to allow for variable length lists
    spec1d_table.add_column(Column(name="coadds", dtype=object, length=len(spec1d_table)))
    spec1d_table.add_column([False]*len(all_spats), name="processed")

    # coadd
    # iterate over coadd_ids
    coadd_to_spec1d = {}
    for coadd_id in set(coaddIDs):
        subtable = spec1d_table[spec1d_table['coadd_id'] == coadd_id]
        fname_spats = {row['filename']: row['spats'].copy() for row in subtable}
        grouped_spats_list = coadding.group_coadds(fname_spats)
        if all(subtable['arm'] == 'red'):
            coadds = coadding.coadd(grouped_spats_list, args.output_path, 'p200_dbsp_red', red_user_config_lines)
        if all(subtable['arm'] == 'blue'):
            coadds = coadding.coadd(grouped_spats_list, args.output_path, 'p200_dbsp_blue', blue_user_config_lines)
        assert all(subtable['arm'] == 'red') or all(subtable['arm'] == 'blue'),\
            "Something went wrong with coadding..."
        for row in subtable:
            spec1d_table.loc[row['filename']]['coadds'] = coadds
        for i, coadd in enumerate(coadds):
            coadd_to_spec1d[coadd] = list(zip(grouped_spats_list[i]['fnames'], grouped_spats_list[i]['spats']))

    if not args.skip_telluric:
        # telluric correct
        if do_red:
            tellcorr_inputs = []
            tell_coadd_fnames = set()
            for row in spec1d_table[spec1d_table['arm'] == 'red']:
                if isinstance(row['coadds'], list):
                    for obj in row['coadds']:
                        if not obj in tell_coadd_fnames:
                            tmp = (obj, args.output_path, 'p200_dbsp_red', red_user_config_lines)
                            tellcorr_inputs.append(tmp)
                            tell_coadd_fnames.add(obj)
            if args.jobs == 1:
                # do it in series
                for tellcorr_input in tqdm.tqdm(tellcorr_inputs):
                    telluric.telluric_correct(*tellcorr_input)
            else:
                pool = multiprocessing.Pool(args.jobs)
                list(tqdm.tqdm(pool.imap(telluric.picklable_telluric_correct, tellcorr_inputs), total=len(tellcorr_inputs)))
                pool.close()
                pool.join()

            # Maybe do something here to verify that telluric correction succeeded
            # and if so, change the coadd names
            for coadd in tell_coadd_fnames:
                tell = coadd.replace(".fits", "_tellcorr.fits")
                tellpath = os.path.join(args.output_path, 'Science', tell)
                coaddpath = os.path.join(args.output_path, 'Science', coadd)
                # check if tell exists and is newer than coadd
                if os.path.isfile(tellpath) and (os.path.getmtime(tellpath) > os.path.getmtime(coaddpath)):
                    # modify coadd
                    for row in spec1d_table:
                        if coadd in row['coadds']:
                            ix = row['coadds'].index(coadd)
                            spec1d_table.loc[row['filename']]['coadds'][ix] = tell
                    coadd_to_spec1d[tell] = coadd_to_spec1d[coadd]
                    del coadd_to_spec1d[coadd]


    # current splicing - make sure spatial fraction is similar on blue/red
    # TODO: handle multiple observations of same target throughout night with null coadding
    # splice data
    splicing_dict = {}
    blue_mask = spec1d_table['arm'] == 'blue'
    red_mask = spec1d_table['arm'] == 'red'

    os.makedirs(os.path.join(args.output_path, 'spliced'), exist_ok=True)

    def get_std_trace(std_path: str) -> float:
        max_sn = -1
        max_fracpos = -1
        with fits.open(std_path) as hdul:
            # loop through trace hdus
            for hdu in hdul:
                if not 'SPAT' in hdu.name:
                    continue

                # look at s/n
                if 'OPT_COUNTS' in hdu.data.dtype.names:
                    this_sn = np.nanmedian(hdu.data['OPT_COUNTS']/hdu.data['OPT_COUNTS_SIG'])
                elif 'BOX_COUNTS' in hdu.data.dtype.names:
                    this_sn = np.nanmedian(hdu.data['BOX_COUNTS']/hdu.data['BOX_COUNTS_SIG'])
                else:
                    this_sn = -1

                if this_sn > max_sn:
                    max_sn = this_sn
                    max_fracpos = hdu.header['SPAT_FRACPOS']

        if max_fracpos == -1:
            raise Exception(f"Error! No HDUs in {os.path.basename(std_path)} have median S/N > 0.")
        return max_fracpos

    ## Need to find red + blue fracpos for standards
    # hopefully standards only have one star each?
    # or should i actually try to do matching there
    fracpos_diff_list = []
    stds = spec1d_table['frametype'] == 'standard'
    if do_red or do_blue:
        FRACPOS_SUM = 1.0
        FRACPOS_TOL = 0.05
        if do_red and do_blue:
            # real matching + splicing
            std_fracpos_sums = []
            if (stds & blue_mask).any() and (stds & red_mask).any():
                for row in spec1d_table[stds]:
                    # find closest mjd frame of other arm
                    if not row['processed']:
                        other_arm = spec1d_table['arm'] != row['arm']
                        corresponding_row = spec1d_table[other_arm & stds][np.abs(spec1d_table[other_arm & stds]['mjd'] - row['mjd']).argmin()]
                        this_path = os.path.join(args.output_path, 'Science', row['filename'])
                        corresponding_path = os.path.join(args.output_path, 'Science', corresponding_row['filename'])
                        std_fracpos_sums.append(get_std_trace(this_path) + get_std_trace(corresponding_path))
                        spec1d_table.loc[row['filename']]['processed'] = True
                        spec1d_table.loc[corresponding_row['filename']]['processed'] = True
                FRACPOS_SUM = np.mean(std_fracpos_sums)
                FRACPOS_TOL = FRACPOS_SUM * .025

        # setup splicing dict
        splicing_dict = {}
        # for each target
        for row in spec1d_table:
            target = row['object']
            arm = row['arm']
            # for each of its fracpos
            for i, fracpos in enumerate(row['fracpos']):
                coadd = row['coadds'][i]
                targ_dict = splicing_dict.get(target)
                # normalize fracpos to red
                if do_red and do_blue and arm == 'blue':
                    fracpos = FRACPOS_SUM - fracpos
                # if it's not in the dict
                if targ_dict is None:
                    # put it in the dict
                    splicing_dict[target] = {fracpos: {
                        arm: {
                            'spec1ds': coadd_to_spec1d[coadd],
                            'coadd': coadd
                        }
                    }}
                # else
                else:
                    close_enough = False
                    # for each existing fracpos
                    for fracpos_existing in list(targ_dict):
                        # if its close enough
                        fracpos_diff_list.append(abs(fracpos_existing - fracpos))
                        if abs(fracpos_existing - fracpos) < FRACPOS_TOL:
                            # put it in the dict
                            splicing_dict[target][fracpos_existing][arm] = {
                                'spec1ds': coadd_to_spec1d[coadd],
                                'coadd': coadd
                            }
                            close_enough = True
                            break
                    if not close_enough:
                        # If this fracpos isn't close enough to any others
                        splicing_dict[target][fracpos] = {arm: {
                            'spec1ds': coadd_to_spec1d[coadd],
                            'coadd': coadd
                        }}
        # And now, actually splice!
        splicing.splice(splicing_dict, args.splicing_interpolate_gaps, red_root, args.output_path)

    with open("fracpos_data.pickle", "wb") as f:
        pickle.dump((fracpos_diff_list, FRACPOS_SUM), f)

    print('Elapsed time: {0} seconds'.format(time.perf_counter() - t))
Beispiel #28
0
Datei: core.py Projekt: eteq/tynt
class Filter(object):
    """
    Astronomical filter object.
    """
    def __init__(self, path=data_path):
        """
        Parameters
        ----------
        path : str (optional)
            Path to `fft.fits` file
        """
        self.path = path
        self.table = Table(fits.getdata(path))
        self.table.add_index('col0')

    def available_filters(self):
        """
        Return the available filters in the archive
        """
        return self.table['col0'].data

    def reconstruct(self, identifier):
        """
        Reconstruct an approximate filter transmittance curve for
        a given filter.

        Parameters
        ----------
        identifier : str
            Name of the filter. To see available filters, run
            `~tynt.Filter.available_filters()`

        Returns
        -------
        wavelength : `~numpy.ndarray`
            Wavelength array in Angstroms
        transmittance : `~numpy.ndarray`
            Approximate transmittance as a function of wavelength
        """
        filt = list(self.table.loc[identifier])[1:]
        n_lambda, lambda_0, delta_lambda, tr_max = filt[:4]
        fft = filt[4:]

        wavelength = np.arange(lambda_0,
                               (n_lambda + 1) * delta_lambda + lambda_0,
                               delta_lambda)

        ifft = np.fft.ifft(fft, n=len(wavelength))

        transmittance = ((ifft.real - ifft.real.min()) * tr_max /
                         ifft.real.ptp())

        return wavelength, transmittance

    def download_true_transmittance(self, identifier):
        """
        Query the SVO service for a given filter, return the true transmittance
        curve.

        Parameters
        ----------
        identifier : str
            Name of the filter. To see available filters, run
            `~tynt.Filter.available_filters()`

        Returns
        -------
        wavelength : `~numpy.ndarray`
            True wavelength array in Angstroms
        transmittance : `~numpy.ndarray`
            True transmittance as a function of wavelength
        """
        path = download_file('http://svo2.cab.inta-csic.es/'
                             'theory/fps3/fps.php?ID={0}'.format(identifier))

        true_transmittance = Table.read(path, format='votable')
        return (true_transmittance['Wavelength'].data.data,
                true_transmittance['Transmission'].data.data)
Beispiel #29
0
labels, n = ndimage.label(Gdiff*Gmask)
flaremask = (labels==3)

# Creating arrays containing time-serie data
Int_Inc = []
tiempos = []
for i in range(20):
    diff = Mfiles[i+1].data-Mfiles[i].data
    Mdiff = Map(np.nan_to_num(np.abs(diff)),Mfiles[i].meta)
    Mdiffrot = Mdiff.rotate(angle=Mdiff.meta['crota2'] * u.deg)
    Int_Inc.append((Mdiffrot.data*flaremask).sum())
    tiempos.append(datetime.datetime.strptime(Mdiffrot.meta["date-obs"],'%Y-%m-%dT%H:%M:%S.%f'))

tbl_meta = {'t_key':'t_value'}
table = Table([tiempos, Int_Inc/np.max(Int_Inc)], names=['time', 'Inclination'], meta=Mfiles[i].meta)
table.add_index('time')
ts_table = ts.TimeSeries(table)

# PLOT
fig, ax = plt.subplots(figsize=(10,4))
ts_table.plot(marker='o',linewidth=3)
ax.axvline(tflare, color="gray", linestyle="--")
ax.text(tflare, np.min(Int_Inc/np.max(Int_Inc)), 'flare peak', fontsize=14,color='gray',rotation=90, rotation_mode='anchor')
ax.tick_params(axis='both',labelsize=14)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.set_xlabel('Time [hour:min]',fontsize=16)
ax.set_ylabel('Normalized B-Inclination diff ',fontsize=14)
ax.set_title(Mfiles[imask].meta['date-obs'],fontsize=24)
plt.legend('',frameon=False) # fix bug with legend
fig.savefig('20131108T0422.pdf',dpi=150,bbox_inches='tight')
plt.show()
Beispiel #30
0
def make_todo(input_folder=None, cameras=None, ccds=None, overwrite=False):
    """
	Create the TODO list which is used by the pipeline to keep track of the
	targets that needs to be processed.

	Will create the file `todo.sqlite` in the directory.

	Parameters:
		input_folder (string, optional): Input folder to create TODO list for.
			If ``None``, the input directory in the environment variable ``TESSPHOT_INPUT`` is used.
		cameras (iterable of integers, optional): TESS camera number (1-4). If ``None``, all cameras will be included.
		ccds (iterable of integers, optional): TESS CCD number (1-4). If ``None``, all cameras will be included.
		overwrite (boolean): Overwrite existing TODO file. Default=``False``.

	Raises:
		IOError: If the specified ``input_folder`` is not an existing directory.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    logger = logging.getLogger(__name__)

    # Check the input folder, and load the default if not provided:
    if input_folder is None:
        input_folder = os.environ.get(
            'TESSPHOT_INPUT',
            os.path.join(os.path.dirname(__file__), 'tests', 'input'))

    # Check that the given input directory is indeed a directory:
    if not os.path.isdir(input_folder):
        raise IOError("The given path does not exist or is not a directory")

    # Make sure cameras and ccds are iterable:
    cameras = (1, 2, 3, 4) if cameras is None else (cameras, )
    ccds = (1, 2, 3, 4) if ccds is None else (ccds, )
    Nccds = len(cameras) * len(ccds)

    # The TODO file that we want to create. Delete it if it already exits:
    todo_file = os.path.join(input_folder, 'todo.sqlite')
    if os.path.exists(todo_file):
        if overwrite:
            os.remove(todo_file)
        else:
            logger.info("TODO file already exists")
            return

    # Create the TODO list as a table which we will fill with targets:
    cat = Table(names=('starid', 'camera', 'ccd', 'datasource', 'tmag',
                       'cbv_area'),
                dtype=('int64', 'int32', 'int32', 'S256', 'float32', 'int32'))

    # Load list of all Target Pixel files in the directory:
    tpf_files = find_tpf_files(input_folder)
    logger.info("Number of TPF files: %d", len(tpf_files))

    if len(tpf_files) > 0:
        # Open a pool of workers:
        logger.info("Starting pool of workers for TPFs...")
        threads = int(
            os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count()))
        threads = min(
            threads, len(tpf_files)
        )  # No reason to use more than the number of jobs in total
        logger.info("Using %d processes.", threads)
        pool = multiprocessing.Pool(threads)

        # Run the TPF files in parallel:
        tic = default_timer()
        _tpf_todo_wrapper = functools.partial(_tpf_todo,
                                              input_folder=input_folder,
                                              cameras=cameras,
                                              ccds=ccds,
                                              find_secondary_targets=False)
        for cat2 in pool.imap_unordered(_tpf_todo_wrapper, tpf_files):
            cat = vstack([cat, cat2], join_type='exact')

        pool.close()
        pool.join()

        # Amount of time it took to process TPF files:
        toc = default_timer()
        logger.info("Elaspsed time: %f seconds (%f per file)", toc - tic,
                    (toc - tic) / len(tpf_files))

        # Remove secondary TPF targets if they are also the primary target:
        indx_remove = np.zeros(len(cat), dtype='bool')
        cat.add_index('starid')
        for k, row in enumerate(cat):
            if row['datasource'].startswith('tpf:'):
                indx = cat.loc['starid', row['starid']]['datasource'] == 'tpf'
                if np.any(indx):
                    indx_remove[k] = True
        cat.remove_indices('starid')
        logger.info("Removing %d secondary TPF files as they are also primary",
                    np.sum(indx_remove))
        cat = cat[~indx_remove]

    # Find all targets in Full Frame Images:
    inputs = itertools.product([input_folder], cameras, ccds)

    # Open a pool of workers:
    logger.info("Starting pool of workers for FFIs...")
    threads = int(
        os.environ.get('SLURM_CPUS_PER_TASK', multiprocessing.cpu_count()))
    threads = min(
        threads,
        Nccds)  # No reason to use more than the number of jobs in total
    logger.info("Using %d processes.", threads)

    pool = multiprocessing.Pool(threads)
    ccds_done = 0
    for cat2 in pool.imap_unordered(_ffi_todo_wrapper, inputs):
        cat = vstack([cat, cat2], join_type='exact')
        ccds_done += 1
        logger.info("CCDs done: %d/%d", ccds_done, Nccds)
    pool.close()
    pool.join()

    # Remove duplicates!
    logger.info("Removing duplicate entries...")
    _, idx = np.unique(cat[('starid', 'camera', 'ccd', 'datasource')],
                       return_index=True,
                       axis=0)
    cat = cat[np.sort(idx)]

    # Sort the final list:
    cat.sort('tmag')

    # TODO: Can we make decisions already now on methods?
    # tmag < 2.5 : Halo photometry
    # tmag < 6.5 : Aperture (saturated)
    # tmag > 6.5 : Aperture (with PSF fallback)

    # Write the TODO list to the SQLite database file:
    logger.info("Writing TODO file...")
    conn = sqlite3.connect(todo_file)
    cursor = conn.cursor()

    cursor.execute("""CREATE TABLE todolist (
		priority BIGINT NOT NULL,
		starid BIGINT NOT NULL,
		datasource TEXT NOT NULL DEFAULT 'ffi',
		camera INT NOT NULL,
		ccd INT NOT NULL,
		method TEXT DEFAULT NULL,
		tmag REAL,
		status INT DEFAULT NULL,
		cbv_area INT NOT NULL
	);""")

    for pri, row in enumerate(cat):
        cursor.execute(
            "INSERT INTO todolist (priority,starid,camera,ccd,datasource,tmag,cbv_area) VALUES (?,?,?,?,?,?,?);",
            (pri + 1, int(row['starid']), int(row['camera']), int(
                row['ccd']), row['datasource'].strip(), float(
                    row['tmag']), int(row['cbv_area'])))

    conn.commit()
    cursor.execute("CREATE UNIQUE INDEX priority_idx ON todolist (priority);")
    cursor.execute(
        "CREATE INDEX starid_datasource_idx ON todolist (starid, datasource);"
    )  # FIXME: Should be "UNIQUE", but something is weird in ETE-6?!
    cursor.execute("CREATE INDEX status_idx ON todolist (status);")
    cursor.execute("CREATE INDEX starid_idx ON todolist (starid);")
    conn.commit()

    # Change settings of SQLite file:
    cursor.execute("PRAGMA page_size=4096;")
    # Run a VACUUM of the table which will force a recreation of the
    # underlying "pages" of the file.
    # Please note that we are changing the "isolation_level" of the connection here,
    # but since we closing the conmnection just after, we are not changing it back
    conn.isolation_level = None
    cursor.execute("VACUUM;")

    # Close connection:
    cursor.close()
    conn.close()
    logger.info("TODO done.")
Beispiel #31
0
    "reference_doi": [],
    "acknowledgment": [],
    "notes": [],
    "col_units": []
}

for this_metadata_file in _metadata_files:
    mydoc = minidom.parse(this_metadata_file)
    metas = mydoc.getElementsByTagName('meta')

    for elem in metas:
        _column_data[elem.attributes['name'].value].append(
            elem.firstChild.data)

dataset_index = Table(_column_data)
dataset_index.add_index('datafilename')


def load_dataset(filename):
    """Load a dataset.

    Returns
    -------
    result: QTable
    """
    datafile_path = os.path.join(_data_directory, filename)
    result = QTable(ascii.read(datafile_path, format='csv', fast_reader=False))

    # add the meta data
    this_row = dataset_index.loc['dem_quiet_sun.csv']
    meta_dict = dict(zip(this_row.colnames, this_row))