Ejemplo n.º 1
0
def test_statistics_gui_full_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    spectrum = hub.plot_item._data_item.spectrum
    truth_dict = {
        'mean': spectrum.flux.mean(),
        'median': np.median(spectrum.flux),
        'stddev': spectrum.flux.std(),
        'centroid': centroid(spectrum, region=None),
        'snr': "N/A",
        'fwhm': fwhm(spectrum),
        'ew': equivalent_width(spectrum),
        'total': line_flux(spectrum),
        'maxval': spectrum.flux.max(),
        'minval': spectrum.flux.min()
    }

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Ejemplo n.º 2
0
def test_statistics_gui_roi_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # Make region of interest cutout, using default cutout at .3 from the
    # middle in either direction
    specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region(
    )

    # Simulate cutout for truth data
    spectrum = extract_region(hub.plot_item._data_item.spectrum,
                              SpectralRegion(*hub.selected_region_bounds))

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    truth_dict = {
        'mean': spectrum.flux.mean(),
        'median': np.median(spectrum.flux),
        'stddev': spectrum.flux.std(),
        'centroid': centroid(spectrum, region=None),
        'snr': "N/A",
        'fwhm': fwhm(spectrum),
        'ew': equivalent_width(spectrum),
        'total': line_flux(spectrum),
        'maxval': spectrum.flux.max(),
        'minval': spectrum.flux.min()
    }

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Ejemplo n.º 3
0
def test_statistics_gui_roi_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # Make region of interest cutout, using default cutout at .3 from the
    # middle in either direction
    specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region()

    # Simulate cutout for truth data
    spectrum = extract_region(hub.plot_item._data_item.spectrum,
                              SpectralRegion(*hub.selected_region_bounds))

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    truth_dict = {'mean': spectrum.flux.mean(),
                  'median': np.median(spectrum.flux),
                  'stddev': spectrum.flux.std(),
                  'centroid': centroid(spectrum, region=None),
                  'snr': "N/A",
                  'fwhm': fwhm(spectrum),
                  'ew': equivalent_width(spectrum),
                  'total': line_flux(spectrum),
                  'maxval': spectrum.flux.max(),
                  'minval': spectrum.flux.min()}

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Ejemplo n.º 4
0
def compute_stats(spectrum):
    """
    Compute basic statistics for a spectral region.
    Parameters
    ----------
    spectrum : `~specutils.spectra.spectrum1d.Spectrum1D`
    region: `~specutils.utils.SpectralRegion`
    """
    flux = spectrum.flux
    mean = flux.mean()
    rms = np.sqrt(flux.dot(flux) / len(flux))

    try:
        snr_val = snr(spectrum)
    except Exception as e:
        snr_val = "N/A"

    return {
        'mean': mean,
        'median': np.median(flux),
        'stddev': flux.std(),
        'centroid':
        centroid(spectrum, region=None
                 ),  # we may want to adjust this for continuum subtraction
        'rms': rms,
        'snr': snr_val,
        'fwhm': fwhm(spectrum),
        'ew': equivalent_width(spectrum),
        'total': line_flux(spectrum),
        'maxval': flux.max(),
        'minval': flux.min()
    }
Ejemplo n.º 5
0
def test_statistics_gui_full_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    spectrum = hub.plot_item._data_item.spectrum
    truth_dict = {'mean': spectrum.flux.mean(),
                  'median': np.median(spectrum.flux),
                  'stddev': spectrum.flux.std(),
                  'centroid': centroid(spectrum, region=None),
                  'snr': "N/A",
                  'fwhm': fwhm(spectrum),
                  'ew': equivalent_width(spectrum),
                  'total': line_flux(spectrum),
                  'maxval': spectrum.flux.max(),
                  'minval': spectrum.flux.min()}

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Ejemplo n.º 6
0
def line(spec,wave1,wave2):
    # finding the centorid and deriving guesses parameters
    centre=centroid(spec, SpectralRegion(wave1*u.AA, wave2*u.AA))  
    centre=float(centre/(1. * u.AA))
    FWHM=fwhm(spec) 
    FWHM=float(FWHM/(1. * u.AA))
    A=line_flux(spec, SpectralRegion(lamb1*u.AA, lamb2*u.AA))
    a=1* u.Unit('J cm-2 s-1 AA-1') 
    A=float(A/(1. * u.AA*a))
    # PARAMETERS
    return [centre,A,FWHM]
Ejemplo n.º 7
0
def compute_stats(spectrum):
    """
    Compute basic statistics for a spectral region.
    Parameters
    ----------
    spectrum : `~specutils.spectra.spectrum1d.Spectrum1D`
    region: `~specutils.utils.SpectralRegion`
    """

    try:
        cent = centroid(spectrum, region=None) # we may want to adjust this for continuum subtraction
    except Exception as e:
        logging.debug(e)
        cent = "Error"

    try:
        snr_val = snr(spectrum)
    except Exception as e:
        logging.debug(e)
        snr_val = "N/A"

    try:
        fwhm_val = fwhm(spectrum)
    except Exception as e:
        logging.debug(e)
        fwhm_val = "Error"

    try:
        ew = equivalent_width(spectrum)
    except Exception as e:
        logging.debug(e)
        ew = "Error"

    try:
        total = line_flux(spectrum)
    except Exception as e:
        logging.debug(e)
        total = "Error"

    return {'mean': spectrum.flux.mean(),
            'median': np.median(spectrum.flux),
            'stddev': spectrum.flux.std(),
            'centroid': cent,
            'snr': snr_val,
            'fwhm': fwhm_val,
            'ew': ew,
            'total': total,
            'maxval': spectrum.flux.max(),
            'minval': spectrum.flux.min()}
    def test_to_spectrum1d_compound_bandpass(self):
        from specutils.analysis import line_flux

        box = SpectralElement(Box1D,
                              amplitude=0.5,
                              x_0=5000 * u.AA,
                              width=1 * u.AA)
        bp = box * box
        spec = bp.to_spectrum1d()

        w = bp.waveset
        integrated_thru = bp.integrate()
        assert_quantity_allclose(spec.spectral_axis, w)
        assert_quantity_allclose(spec.flux, bp(w))
        assert_quantity_allclose(integrated_thru, 0.25 * u.AA)
        assert_quantity_allclose(integrated_thru, line_flux(spec))
    def test_to_spectrum1d_compound_source(self):
        from specutils.analysis import line_flux

        total_flux = 0.5 * (u.erg / u.s / u.cm / u.cm)
        fwhm = 1 * u.AA
        g1 = SourceSpectrum(GaussianFlux1D,
                            mean=300 * u.nm,
                            fwhm=fwhm,
                            total_flux=total_flux)
        g2 = SourceSpectrum(GaussianFlux1D,
                            mean=400 * u.nm,
                            fwhm=fwhm,
                            total_flux=total_flux)
        sp = g1 + g2
        spec = sp.to_spectrum1d(flux_unit=units.FLAM)
        integrated_flux = sp.integrate(flux_unit=units.FLAM)
        assert_quantity_allclose(integrated_flux,
                                 1 * total_flux.unit,
                                 rtol=0.002)
        assert_quantity_allclose(integrated_flux, line_flux(spec), rtol=1e-5)
Ejemplo n.º 10
0
def line_fit(spec,
             spec_err,
             wave_obj,
             dwave=10. * u.AA,
             dwave_cont=100. * u.AA,
             sigmamax=14. * u.AA):
    '''
    Function to fit a 1D gaussian to a HETDEX spectrum from get_spec.py

    Parameters
    ----------
    spec
        1D spectrum from a row in the table provided by get_spec.py.
        Will assume unit of 10**-17*u.Unit('erg cm-2 s-1 AA-1') if no units
        are provided.
    spec_err
        1D spectral uncertainty from table provided by get_spec.py.
        Will assume unit of 10**-17*u.Unit('erg cm-2 s-1 AA-1') if no units
        are provided.
    wave_obj
        wavelength you want to fit, an astropy quantity
    dwave
        spectral region above and below wave_obj to fit a line, an astropy quantity.
        Default is 10.*u.AA
    dwave_cont
        spectral region to fit continuum. Default is +/- 100.*u.AA
    sigmamax
        Maximum linewidth (this is sigma/stdev of the gaussian fit) to allow
        for a fit. Assumes unit of u.AA if not given

    Returns
    -------

    '''

    try:
        spectrum = Spectrum1D(flux=spec,
                              spectral_axis=(2.0 * np.arange(1036) + 3470.) *
                              u.AA,
                              uncertainty=StdDevUncertainty(spec_err),
                              velocity_convention=None)
    except ValueError:
        spectrum = Spectrum1D(
            flux=spec * 10**-17 * u.Unit('erg cm-2 s-1 AA-1'),
            spectral_axis=(2.0 * np.arange(1036) + 3470.) * u.AA,
            uncertainty=StdDevUncertainty(spec_err * 10**-17 *
                                          u.Unit('erg cm-2 s-1 AA-1')),
            velocity_convention=None)

    # measure continuum over 2*dwave_cont wide window first:
    cont_region = SpectralRegion((wave_obj - dwave_cont),
                                 (wave_obj + dwave_cont))
    cont_spectrum = extract_region(spectrum, cont_region)
    cont = np.median(cont_spectrum.flux)

    if np.isnan(cont):
        #set continuum if its NaN
        print('Continuum fit is NaN. Setting to 0.0')
        cont = 0.0 * cont_spectrum.unit

    # now get region to fit the continuum subtracted line

    sub_region = SpectralRegion((wave_obj - dwave), (wave_obj + dwave))
    sub_spectrum = extract_region(spectrum, sub_region)

    try:
        line_param = estimate_line_parameters(sub_spectrum - cont,
                                              models.Gaussian1D())
    except:
        return None

    if np.isnan(line_param.amplitude.value):
        print('Line fit yields NaN result. Exiting.')
        return None

    try:
        sigma = np.minimum(line_param.stddev, sigmamax)
    except ValueError:
        sigma = np.minimum(line_param.stddev, sigmamax * u.AA)

    if np.isnan(sigma):
        sigma = sigmamax

    g_init = models.Gaussian1D(amplitude=line_param.amplitude,
                               mean=line_param.mean,
                               stddev=sigma)

    #    lineregion = SpectralRegion((wave_obj-2*sigma), (wave_obj+2*sigma))
    #    cont = fit_generic_continuum(sub_spectrum, exclude_regions=lineregion,
    #                                 model=models.Linear1D(slope=0))

    #r1 = SpectralRegion((wave_obj-dwave), (wave_obj-2*sigma))
    #r2 = SpectralRegion((wave_obj+2*sigma), (wave_obj+dwave))
    #fitcontregion = r1 + r2

    #fit_cont_spectrum = extract_region(sub_spectrum, fitcontregion)
    #cont = np.mean(np.hstack([fit_cont_spectrum[0].flux, fit_cont_spectrum[1].flux]))

    #contspec = cont(sub_spectrum.spectral_axis)

    g_fit = fit_lines(sub_spectrum - cont, g_init)

    x = np.arange(wave_obj.value - dwave.value, wave_obj.value + dwave.value,
                  0.5) * u.AA
    y_fit = g_fit(x)

    line_flux_model = np.sum(y_fit * 0.5 * u.AA)

    chi2 = calc_chi2(sub_spectrum - cont, g_fit)

    sn = np.sum(np.array(sub_spectrum.flux)) / np.sqrt(
        np.sum(sub_spectrum.uncertainty.array**2))

    line_flux_data = line_flux(sub_spectrum - cont).to(u.erg * u.cm**-2 *
                                                       u.s**-1)

    line_flux_data_err = np.sqrt(np.sum(sub_spectrum.uncertainty.array**2))

    #fitted_region = SpectralRegion((line_param.mean - 2*sigma),
    #                               (line_param.mean + 2*sigma))

    #fitted_spectrum = extract_region(spectrum, fitted_region)

    #line_param = estimate_line_parameters(fitted_spectrum, models.Gaussian1D())

    #sn = np.sum(np.array(fitted_spectrum.flux)) / np.sqrt(np.sum(
    #    fitted_spectrum.uncertainty.array**2))

    #line_flux_data = line_flux(fitted_spectrum).to(u.erg * u.cm**-2 * u.s**-1)

    #line_flux_data_err = np.sqrt(np.sum(fitted_spectrum.uncertainty.array**2))

    return line_param, sn, chi2, sigma, line_flux_data, line_flux_model, line_flux_data_err, g_fit, cont
def Load_Files(file_1, file_2, N_sample, objts, classification=False):
    print('INFO:')
    #hdul = fitsio.FITS(file_1) # Open file 1 -- 'truth_DR12Q.fits'
    #info=hdul.info() # File info
    hdul = fits.open(file_1, mode='denywrite')
    #data=hdul[1].read() # Database of spectra with human-expert classifications
    data = hdul[1].data
    #print('The file {} have {} objects. \n'.format(file_1,data.shape[0]))

    print('INFO:')
    # Reading data from data_dr12.fits. This file had the spectra from data dr12.
    #hdul_2 = fitsio.FITS(file_2) # Open file 2 -- 'data_dr12.fits'
    #info2=hdul_2.info() # File info
    #data2=hdul_2[1].read() # Database of spectra
    #spectra=hdul_2[0].read() # Spectrum of each object
    hdul_2 = fits.open(file_2, mode='denywrite')
    data2 = hdul_2[1].data  # Database of spectra
    spectra = hdul_2[0].data  # Spectrum of each object

    #print('The file {} have {} spectra. \n'.format(file_2,spectra.shape[0]))

    # Subset of PLATE parameters of both data
    data_PLATE_1 = data['PLATE']
    data_PLATE_2 = data2['PLATE']

    # Subset of MJD parameters of both data
    data_MJD_1 = data['MJD']
    data_MJD_2 = data2['MJD']

    # Subset of FIBERID parameters of both data
    data_FIBERID_1 = data['FIBERID']
    data_FIBERID_2 = data2['FIBERID']
    data_ID_1 = data['THING_ID']
    data_ID_2 = data2['TARGETID']

    objts = np.asarray(objts)

    # The column 'CLASS_PERSON' have a class identifier for each spectrum: STARS=1, GALAXY=4, QSO=3 and QSO_BAL=30.
    C_P = data['CLASS_PERSON']  #Class Person column
    STAR = C_P[C_P == 1]  # objects classified as stars
    GALAXY = C_P[C_P == 4]  # objects classified as galaxies
    QSO = C_P[C_P == 3]  # objects classified as QSO (Quasars)
    QSO_BAL = C_P[
        C_P ==
        30]  # objects classified as QSO BAL (Quasars with Broad Absortions Lines)
    N_C = C_P[C_P != 30]
    N_C = N_C[N_C != 3]
    N_C = N_C[N_C != 1]
    N_C = N_C[N_C != 4]  # objects wrong classified

    print('INFO: There is available')
    print('-->Star:', STAR.shape[0])
    print('-->Galaxy:', GALAXY.shape[0])
    print('-->QSO:', QSO.shape[0])
    print('-->QSO BAL:', QSO_BAL.shape[0])
    print('-->NN: {}\n'.format(N_C.shape[0]))

    # I create two DataFrame for Superset_DR12Q and data_dr12 with only three parameters
    data1 = {
        'PLATE': data_PLATE_1,
        'MJD': data_MJD_1,
        'FIBERID': data_FIBERID_1,
        'ID': data_ID_1
    }
    data1 = pd.DataFrame(data=data1)

    data2 = {
        'PLATE': data_PLATE_2,
        'MJD': data_MJD_2,
        'FIBERID': data_FIBERID_2,
        'ID': data_ID_2
    }
    data2 = pd.DataFrame(data=data2)

    # I convert all objects in both set to string chain in orden to combine them as one new ID.
    data1['PLATE'] = data1['PLATE'].astype(str)
    data1['MJD'] = data1['MJD'].astype(str)
    data1['FIBERID'] = data1['FIBERID'].astype(str)
    data1['PM'] = data1['MJD'].str.cat(data1['FIBERID'], sep="-")
    data1['NEWID'] = data1['PLATE'].str.cat(data1['PM'], sep="-")
    data_1 = data1.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']).values

    data2['PLATE'] = data2['PLATE'].astype(str)
    data2['MJD'] = data2['MJD'].astype(str)
    data2['FIBERID'] = data2['FIBERID'].astype(str)
    data2['PM'] = data2['MJD'].str.cat(data2['FIBERID'], sep="-")
    data2['NEWID'] = data2['PLATE'].str.cat(data2['PM'], sep="-")
    data_2 = data2.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']
                        ).values  # New set of database 2 with new ID's

    # With the routine of numpy intersect1d, I find the intersections elements in both sets. This elements
    data_CO = np.array(np.intersect1d(data_1, data_2, return_indices=True))

    data_CO_objects = data_CO[
        0]  # The unique new ID of each element in both sets
    data_CO_ind1 = data_CO[
        1]  # Indices of intersected elements from the original data 1 (Superset_DR12Q.fits)
    data_CO_ind2 = data_CO[
        2]  # Indices of intersected elements form the original data 2 (data_dr12.fits)
    print('INFO:')
    print('I find {} objects with spectra from DR12 \n'.format(
        len(data_CO_objects)))
    indi = {'ind1': data_CO_ind1, 'ind2': data_CO_ind2}
    ind = pd.DataFrame(data=indi, index=data_CO_ind1)

    cp = np.array(data['CLASS_PERSON'], dtype=float)
    z = np.array(data['Z_VI'], dtype=float)
    zc = np.array(data['Z_CONF_PERSON'], dtype=float)
    bal = np.array(data['BAL_FLAG_VI'], dtype=float)
    bi = np.array(data['BI_CIV'], dtype=float)

    d = {
        'CLASS_PERSON': cp,
        'Z_VI': z,
        'Z_CONF_PERSON': zc,
        'BAL_FLAG_VI': bal,
        'BI_CIV': bi
    }
    data_0 = pd.DataFrame(data=d)

    obj = data_0.loc[data_CO_ind1]

    if (classification != True):

        if (objts[0] == 'QSO'):
            qsos = obj.loc[obj['CLASS_PERSON'] == 3]
            qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3]
            sample_objects = qsos.sample(n=int(N_sample),
                                         weights='CLASS_PERSON',
                                         random_state=5)

            indi = np.array(sample_objects.index)
            indi1 = ind.loc[indi].values

        elif (objts[0] == 'QSO_BAL'):
            qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30]
            qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3]
            sample_objects = qsos_bal.sample(n=int(N_sample),
                                             weights='CLASS_PERSON',
                                             random_state=5)

            indi = np.array(sample_objects.index)
            indi1 = ind.loc[indi].values

        elif (len(objts) == 2):
            qsos = obj.loc[obj['CLASS_PERSON'] == 3]
            qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3]

            qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30]
            qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3]

            sample_qso = qsos.sample(n=int(N_sample / 2),
                                     weights='CLASS_PERSON',
                                     random_state=5)
            sample_qso_bal = qsos_bal.sample(n=int(N_sample / 2),
                                             weights='CLASS_PERSON',
                                             random_state=5)
            sample_objects = pd.concat([sample_qso, sample_qso_bal])

            ind_qso = np.array(sample_qso.index)
            ind_qso_bal = np.array(sample_qso_bal.index)

            indi = np.concatenate((ind_qso, ind_qso_bal), axis=None)
            indi1 = ind.loc[indi].values

        spectra_ = np.zeros((N_sample, 886))

        j = 0
        kernel_size = 5
        flux_threshold = 1.1
        parameters = np.zeros(
            (N_sample, 7)
        )  #Number of lines // FHWM of max emission line // EW of max emission line // Spectrum Mean // Spectrum STDV // Spectrum Flux Integral // Spectrum SNR
        for i in indi:
            k = indi1[j, 1]
            x = np.linspace(3600, 10500, 443)
            zero_spectrum = spectra[k, :443]
            spectrum = Spectrum1D(flux=zero_spectrum * u.Jy,
                                  spectral_axis=x * u.AA)

            #Continuum fit and gaussian smooth
            g1_fit = fit_generic_continuum(spectrum)
            y_continuum_fitted = g1_fit(x * u.AA)
            spec_nw_2 = spectrum / y_continuum_fitted
            spectrum_smooth = gaussian_smooth(spec_nw_2, kernel_size)

            #Number of lines
            lines_1 = find_lines_derivative(spectrum_smooth,
                                            flux_threshold=flux_threshold)
            l = lines_1[lines_1['line_type'] == 'emission']
            number_lines = l['line_center_index'].shape[0]
            parameters[j, 0] = number_lines

            #FWHM
            parameters[j, 1] = fwhm(spectrum_smooth).value

            #EW
            parameters[j, 2] = equivalent_width(spectrum_smooth).value

            #Spectrum Mean
            parameters[j, 3] = np.mean(spectrum_smooth.flux)

            #Spectrum STDV
            parameters[j, 4] = np.std(spectrum_smooth.flux)

            #Spectrum Flux Integral
            parameters[j, 5] = line_flux(spectrum_smooth).value

            #Spectrum SNR
            parameters[j, 6] = snr_derived(spectrum_smooth).value
            j += 1

        d = {
            'Lines_Number': parameters[:, 0],
            'FHWM': parameters[:, 1],
            'EW': parameters[:, 2],
            'Mean': parameters[:, 3],
            'STDV': parameters[:, 4],
            'STDV': parameters[:, 4],
            'Spectrum_Flux': parameters[:, 5],
            'SNR': parameters[:, 6]
        }

        parameters = pd.DataFrame(data=d)
        #X=spectra_.values

        #mean_flx= np.ma.average(X[:,:443],axis=1)
        #ll=(X[:,:443]-mean_flx.reshape(-1,1))**2
        #aveflux=np.ma.average(ll, axis=1)
        #sflux = np.sqrt(aveflux)
        #X = (X[:,:443]-mean_flx.reshape(-1,1))/sflux.reshape(-1,1)

        y = sample_objects['Z_VI']
        y = np.array(y, dtype=float)
        #y_max=np.max(y)
        #y=y/y_max

        return parameters, y

    stars = obj.loc[obj['CLASS_PERSON'] == 1]
    galaxies = obj.loc[obj['CLASS_PERSON'] == 4]
    qsos = obj.loc[obj['CLASS_PERSON'] == 3]
    qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30]

    sample_star = stars.sample(n=int(N_sample / 4),
                               weights='CLASS_PERSON',
                               random_state=5)
    sample_galaxy = galaxies.sample(n=int(N_sample / 4),
                                    weights='CLASS_PERSON',
                                    random_state=5)
    sample_qso = qsos.sample(n=int(N_sample / 4),
                             weights='CLASS_PERSON',
                             random_state=5)
    sample_qso_bal = qsos_bal.sample(n=int(N_sample / 4),
                                     weights='CLASS_PERSON',
                                     random_state=5)

    sample_objects = pd.concat(
        [sample_star, sample_galaxy, sample_qso, sample_qso_bal])

    ind_star = np.array(sample_star.index)
    ind_galaxy = np.array(sample_galaxy.index)
    ind_qso = np.array(sample_qso.index)
    ind_qso_bal = np.array(sample_qso_bal.index)

    indi = np.concatenate((ind_star, ind_galaxy, ind_qso, ind_qso_bal),
                          axis=None)
    indi1 = ind.loc[indi].values

    spectra_ = np.zeros((N_sample, 886))
    j = 0
    for i in indi:
        k = indi1[j, 1]
        spectra_[j, :] = spectra[k, :]
        j = j + 1

    spectra_ = pd.DataFrame(spectra_)
    X = spectra_.values

    #Renormalize spectra

    mean_flx = np.ma.average(X[:, :443], axis=1)
    ll = (X[:, :443] - mean_flx.reshape(-1, 1))**2
    aveflux = np.ma.average(ll, axis=1)
    sflux = np.sqrt(aveflux)
    X = (X[:, :443] - mean_flx.reshape(-1, 1)) / sflux.reshape(-1, 1)

    y = sample_objects['CLASS_PERSON']
    y = y.replace([1, 4, 3, 30], [0, 1, 2, 3]).values
    y = np.array(y, dtype=float)

    return X, y