コード例 #1
0
class Zap():
    """
    Master class for zapping data.
    Requires:

    file        -       .FITS (must be PSRFITS v5+ format)

    Optional:

    template    -       ASCII format:       BIN#    Flux           (Required if not doing NN exicison)
    method      -       Either 'chauvenet', 'DMAD' or 'NN'
    verbose     -       Prints more information to the console
    **kwargs    -       Get parsed to plot.histogram_and_curves() or
    """
    def __init__(self,
                 file,
                 template,
                 method='chauvenet',
                 nn_params=None,
                 verbose=False,
                 **kwargs):
        self.file = file
        if "cal" in self.file:
            raise ValueError(f"File {self.file} is not in PSR format.")
        elif "59071" in self.file:
            raise ValueError(f"Not doing 59071...")
        self.method = method
        self.verbose = verbose
        self.ar = Archive(file, verbose=False)
        if method != 'NN':
            _, self.template = u.get_data_from_asc(template)
            self.opw = u.get_1D_OPW_mask(self.template, windowsize=128)
            self.omit, self.rms_mu, self.rms_sigma = self.get_omission_matrix(
                **kwargs)
            unique, counts = np.unique(self.omit, return_counts=True)
            print(f"Good channels: {100*(counts[0]/sum(counts)):.3f}%")
            print(f"Bad channels: {100*(counts[1]/sum(counts)):.3f}%")
        elif nn_params != None:
            df = pd.DataFrame(
                np.reshape(self.ar.getData(),
                           (self.ar.getNsubint() * self.ar.getNchan(),
                            self.ar.getNbin())))
            scaler = MinMaxScaler()
            scaled_df = scaler.fit_transform(df.iloc[:, :])
            scaled_df = pd.DataFrame(scaled_df)
            self.x = scaled_df.iloc[:, :].values.transpose()
            self.nn = NeuralNet(self.x, np.array([[0], [0]]))
            self.nn.dims = [self.ar.getNbin(), 512, 10, 13, 8, 6, 6, 4, 4, 1]
            self.nn.threshold = 0.5
            self.nn.load_params(root=nn_params)
            self.omit = self.nn_get_omission()
            np.set_printoptions(threshold=sys.maxsize)
            unique, counts = np.unique(self.omit, return_counts=True)
            print(f"Good channels: {100*(counts[0]/sum(counts)):.3f}%")
            print(f"Bad channels: {100*(counts[1]/sum(counts)):.3f}%")
        else:
            sys.exit()

    def nn_get_omission(self):
        pred = np.around(np.squeeze(self.nn.pred_data(self.x, False)),
                         decimals=0).astype(np.int)
        pred = np.reshape(pred, (self.ar.getNsubint(), self.ar.getNchan()))

        return pred

    def get_omission_matrix(self, **kwargs):

        rms, lin_rms, mu, sigma = u.rms_arr_properties(
            self.ar.getData(), self.opw, 1.0)  # Needs to input 2D array

        # Creates the histogram
        plot.histogram_and_curves(
            lin_rms,
            mean=mu,
            std_dev=sigma,
            bins=(self.ar.getNchan() * self.ar.getNsubint()) // 4,
            x_axis='Root Mean Squared',
            y_axis='Frequency Density',
            title=r'$M={},\ \sigma={}$'.format(mu, sigma),
            **kwargs)

        if self.method == 'chauvenet':
            rej_arr = physics.chauvenet(rms,
                                        median=mu,
                                        std_dev=sigma,
                                        threshold=2.0)
        elif self.method == 'DMAD':
            rej_arr = physics.DMAD(lin_rms, threshold=3.5)
            rej_arr = np.reshape(rej_arr,
                                 (self.ar.getNsubint(), self.ar.getNchan()))

        if self.verbose:
            print("Rejection criterion created.")

        return rej_arr, mu, sigma

    def plot_mask(self, **kwargs):

        fig = plt.figure(figsize=(7, 7))
        ax = fig.add_subplot(111)
        ax.imshow(self.omit.T,
                  cmap=plt.cm.gray,
                  interpolation='nearest',
                  aspect='auto')
        plt.show()

    def save_training_set(self, val_size=0.2):
        # From Chauvenet or DMAD. 1 is bad channel

        with open(
                f'{self.ar.getName()}_{int(self.ar.getMJD())}_{self.ar.getFrontend()}_{self.ar.getNbin()}.training',
                'w') as t:
            t.write(
                f'# Training set for {self.ar.getName()} taken on {int(self.ar.getMJD())} at {self.ar.getFrontend()}\n'
            )
        with open(
                f'{self.ar.getName()}_{int(self.ar.getMJD())}_{self.ar.getFrontend()}_{self.ar.getNbin()}.validation',
                'w') as t:
            t.write(
                f'# Validation set for {self.ar.getName()} taken on {int(self.ar.getMJD())} at {self.ar.getFrontend()}\n'
            )

        ps_0 = np.zeros(2049)[np.newaxis, :]
        ps_1 = np.zeros(2049)[np.newaxis, :]

        d = self.ar.getData().reshape(
            (self.ar.getNchan() * self.ar.getNsubint(), self.ar.getNbin()))
        omission = self.omit.reshape(
            (self.ar.getNchan() * self.ar.getNsubint()))

        i = 1
        for omit, profile in zip(omission, d):
            try:
                choice = int(omit)
                if choice == 1:
                    choice = 0
                elif choice == 0:
                    choice = 1
            except ValueError:
                choice = -1

            print(i, end='\r')

            if choice != -1:
                # Creates the profile / choice pairs and doubles up with the reciprocal profiles.
                p = np.append(profile, choice)
                #inv_p = np.append( -1*profile, choice )
                if choice == 0:
                    ps_0 = np.append(ps_0, p[np.newaxis, :], axis=0)
                else:
                    ps_1 = np.append(ps_1, p[np.newaxis, :], axis=0)

            i += 1

        ps_0, ps_1 = np.delete(ps_0, 0, 0), np.delete(ps_1, 0, 0)

        # Sort into training / validation sets
        train, validation = train_test_split(ps_0, test_size=val_size)
        ones_t, ones_v = train_test_split(ps_1, test_size=val_size)
        train, validation = np.append(train, ones_t,
                                      axis=0), np.append(validation,
                                                         ones_v,
                                                         axis=0)

        np.random.shuffle(train), np.random.shuffle(validation)

        for k in train:
            with open(
                    f'{self.ar.getName()}_{int(self.ar.getMJD())}_{self.ar.getFrontend()}_{self.ar.getNbin()}.training',
                    'a') as t:
                np.savetxt(t, k, fmt='%1.5f ', newline='')
                t.write("\n")
                #np.savetxt( t, inv_p, fmt = '%1.5f ', newline = '' )
                #t.write( "\n" )

        for k in validation:
            with open(
                    f'{self.ar.getName()}_{int(self.ar.getMJD())}_{self.ar.getFrontend()}_{self.ar.getNbin()}.validation',
                    'a') as t:
                np.savetxt(t, k, fmt='%1.5f ', newline='')
                t.write("\n")

    # Save as ASCII text file
    def save(self, outroot="zap_out", ext='.ascii'):
        outfile = outroot + ext
        with open(outfile, 'w+') as f:
            for i, t in enumerate(self.omit):
                for j, rej in enumerate(t):
                    if rej == True:
                        f.write(str(i) + " " + str(self.ar.freq[i][j]) + "\n")
                        #f.write( f'{k} {self.ar.freq[k][i]}\n' )
        return outfile