def __init__(self, dataPath):
        self.dataPath = dataPath
        self.dataLoader = loader(self.dataPath)
        self.dataLoader.clean_inc_angle()

        self.model = []
        self.train_test_split_val = 0.8
        self.run_weight_name = 'model_weights_1219.hdf5'
 def submission(self):
     print('Generating submission...')
     testLoader = loader('../iceberg_ship_classifier/data_test/test.json')
     self.model.load_weights(self.run_weight_name)
     pred = self.model.predict(testLoader.X_train)
     submission = pd.DataFrame()
     submission['id'] = testLoader.id
     submission['is_iceberg'] = pred.reshape((pred.shape[0]))
     submission.to_csv('sub_vgg_1221.csv', index=False)
    def submission_on_best(self):
        print('Generating submission...')
        testLoader = loader('../iceberg_ship_classifier/data_test/test.json')

        minInd = self.loss.index(np.min(self.loss))
        weight_name = "vgg_1220_weights_run_" + str(minInd) + ".hdf5"
        self.model.load_weights(weight_name)

        pred = self.model.predict(testLoader.X_train)

        submission = pd.DataFrame()
        submission['id'] = testLoader.id
        submission['is_iceberg'] = pred.reshape((pred.shape[0]))
        submission.to_csv('sub_vgg_1220.csv', index=False)
    def submission(self, testpath):
        print('Generating submission...')
        testLoader = loader(testpath)

        if not self.model:
            self.create_model()

        self.model.load_weights(self.run_weight_name)

        pred = self.model.predict_proba(testLoader.X_train)

        submission = pd.DataFrame()
        submission['id'] = testLoader.id
        submission['is_iceberg'] = pred.reshape((pred.shape[0]))
        submission.to_csv('sub_' + self.run_weight_name[:-5] + '.csv',
                          index=False)
    def __init__(self, dataPath):
        self.dataPath = dataPath
        self.dataLoader = loader(self.dataPath)
        self.dataLoader.clean_inc_angle()

        self.model = self.vgg_model()
        self.train_test_split_val = 0.8
        self.run_weight_name = 'vgg_pl_1223.hdf5'
        self.gen = ImageDataGenerator(horizontal_flip=True,
                                      vertical_flip=True,
                                      width_shift_range=0.,
                                      height_shift_range=0.,
                                      channel_shift_range=0,
                                      zoom_range=0.2,
                                      rotation_range=10)

        self.loss = []
    def pseudoLabelTrain(self, test_path):
        trainImg, valImg, trainLabel, valLabel = train_test_split(
            self.dataLoader.X_train, self.dataLoader.labels, train_size=0.8)
        testLoader = loader(test_path)
        generator = self.gen.flow(trainImg, trainLabel)
        es, msave = self.callbacks(wname=self.run_weight_name)

        model = self.vgg_model_no_angle()

        model.fit_generator(generator,
                            epochs=500,
                            steps_per_epoch=24,
                            verbose=1,
                            validation_data=(valImg, valLabel),
                            callbacks=[es])

        predValues = model.predict(testLoader.X_train, verbose=1)

        for i in range(len(predValues)):
            if predValues[i] < 0.05 or predValues[i] > 0.95:
                tmp = np.ndarray((1, 75, 75, 3))
                tmp[:, :, :, :] = testLoader.X_train[i]
                trainImg = np.concatenate((trainImg, tmp))
                trainLabel = np.append(trainLabel, predValues[i] > 0.5)

        model_2 = self.vgg_model_no_angle()
        es, _ = self.callbacks(wname=self.run_weight_name)
        generator_2 = self.gen.flow(trainImg, trainLabel)
        model_2.fit_generator(generator_2,
                              epochs=500,
                              steps_per_epoch=24,
                              verbose=1,
                              validation_data=(valImg, valLabel),
                              callbacks=[es, msave])

        pred = model_2.predict(testLoader.X_train)
        submission = pd.DataFrame()
        submission['id'] = testLoader.id
        submission['is_iceberg'] = pred.reshape((pred.shape[0]))
        submission.to_csv('sub_vgg_pl_1223.csv', index=False)
        return 0
    def pseudoLabelingValidation(self, test_path):
        testLoader = loader(test_path)
        testLoader.median_filter()
        trainLoader = self.dataLoader
        trainLoader.median_filter()
        n_split = 10
        kfold = StratifiedKFold(n_splits=n_split,
                                shuffle=True,
                                random_state=16)
        count = 0
        loss = []

        trainImg, valImg, trainLabel, valLabel = train_test_split(
            trainLoader.X_train, trainLoader.labels, train_size=0.8)

        for train_k, test_k in kfold.split(trainImg, trainLabel):
            print('Run ' + str(count + 1) + ' out of ' + str(n_split))

            run_wname = '/scratch/manjuns/iceberg_ship_classifier/run_%s_weights.hdf5' % count
            print('Run weight name: ' + str(run_wname))

            tImg = trainImg[train_k]
            tLabel = trainLabel[train_k]

            generator = self.gen.flow(tImg, tLabel)
            es, msave = self.callbacks(wname=run_wname)

            model = self.vgg_model_no_angle()

            model.fit_generator(generator,
                                epochs=500,
                                steps_per_epoch=24,
                                verbose=1,
                                validation_data=(valImg, valLabel),
                                callbacks=[es])

            predValues = model.predict(testLoader.X_train)

            print('Fold ' + str(count) +
                  ' training 1 completed. Psuedolabeling test data.......')

            for i in range(len(predValues)):
                if predValues[i] < 0.15 or predValues[i] > 0.85:
                    tmp = np.ndarray((1, 75, 75, 3))
                    tmp[:, :, :, :] = testLoader.X_train[i]
                    tImg = np.concatenate((tImg, tmp))
                    tLabel = np.append(tLabel, predValues[i] > 0.5)

            print('Fold ' + str(count) + ' training 2 commencing...')

            model_2 = self.vgg_model_no_angle()

            es, _ = self.callbacks(wname=self.run_weight_name)
            generator_2 = self.gen.flow(tImg, tLabel)
            model_2.fit_generator(generator_2,
                                  epochs=500,
                                  steps_per_epoch=24,
                                  verbose=1,
                                  validation_data=(valImg, valLabel),
                                  callbacks=[es, msave])

            scores = model_2.evaluate(trainImg[test_k], trainLabel[test_k])
            print(scores)
            loss.append(scores[0])
            count += 1

        for i in range(len(loss)):
            print("Run " + str(i + 1) + ": " + str(loss[i]))
        print("")
        print("Loss Mean: " + str(np.mean(loss)) + " Loss std: " +
              str(np.std(loss)))

        minInd = loss.index(min(loss))
        print('Minimum Weight Index: ' + str(minInd))

        bestRunWeight = '/scratch/manjuns/iceberg_ship_classifier/run_%s_weights.hdf5' % minInd
        bestModel = self.vgg_model_no_angle()
        bestModel.load_weights(bestRunWeight)

        pred = bestModel.predict(testLoader.X_train)
        submission = pd.DataFrame()
        submission['id'] = testLoader.id
        submission['is_iceberg'] = pred.reshape((pred.shape[0]))
        submission.to_csv('sub_vgg_pl_1230.csv', index=False)

        return 0
Esempio n. 8
0
import argparse
from astropy.io import fits
from loadData import loader

parser = argparse.ArgumentParser(description='')
parser.add_argument('-d', '--datafile', required=True, help='The location of the *.raw or *.dat data file.')
parser.add_argument('-f', '--fitsfile', required=True, help='The location of the resulting *.fits fits file.')
parser.add_argument('--pulsarname', required=True, help='The name of the pulsar, as noted in the database, \'pulsardata.txt\'.')
parser.add_argument('--frequencymix', type=float, required=True, help='The name of the pulsar, as noted in the database, \'pulsardata.txt\'.')
parser.add_argument('--obstime', required=True, help='The start datetime of the observation, in isot format (e.g. 2018-03-10T14:00:00)')
args = parser.parse_args()

data = loader(args.datafile)
header = fits.Header()
header['SRC_NAME'] = args.pulsarname
header['FREQMIX'] = args.frequencymix
header['DATE-OBS'] = args.obstime
hdu = fits.ImageHDU(data, header)
hdu.writeto(args.fitsfile)
Esempio n. 9
0
    def __init__(self, fn_or_cfg):
        if type(fn_or_cfg) == Config:
            cfg = fn_or_cfg
            filename = cfg.FileName
            fileformat = cfg.FileFormat if 'FileFormat' in cfg else cfg.FileName.rstrip(
                '.gz').split('.')[-1]
        elif type(fn_or_cfg) == str:
            cfg = None
            filename = fn_or_cfg
            fileformat = filename.rstrip('.gz').split('.')[-1]
        self.fileformat = fileformat

        self.chisqperiod = None
        if fileformat == 'fits':
            hdulist = fits.open(filename)
            header = hdulist[1].header
            self.data = hdulist[1].data
            self.psr_name = header['SRC_NAME']
            self.obs_start_isot = header['DATE-OBS']
            self.obs_start = Time(self.obs_start_isot)
            self.mix_freq = header['FREQMIX']
            if len(hdulist) > 2:
                self.chisqperiod = hdulist[2].header['bestp']
        elif fileformat == 'fil':
            obs = Waterfall(filename)
            header = obs.header
            self.psr_name = header[b'source_name'].decode('utf-8')[4:]
            self.obs_start_isot = header[b'tstart']
            self.obs_start = Time(self.obs_start_isot, format='mjd')
            # Tammo-Jan doesn't store the mix frequency
            self.mix_freq = header[b'fch1'] - 21.668359375
            # These files have the frequency axis in decending, and include an extra unnecessary axis.
            self.data = np.squeeze(obs.data[:, :, -2::-1]).astype(np.uint16)
            # Some files have a missing frequency list for some reason. To avoid errors after folding, add some artificial data...
            self.data[:, ~np.any(self.data, axis=0)] += 65535
        else:
            assert cfg.ObsMetaData, 'Need to supply the metadata: observation time, mixing frequency and pulsar!'
            self.data = loader(cfg.FileName).astype(np.uint32)

        if cfg is not None and cfg.ObsMetaData:
            # Overwrite with metadata from the Config if present
            self.psr_name = cfg.ObsMetaData.PulsarName
            self.obs_start_isot = cfg.ObsMetaData.ObsDate
            self.obs_start = Time(self.obs_start_isot)
            self.mix_freq = cfg.ObsMetaData.MixFreq

        dt = 64 * 512 / (70e6)
        self.obs_dur = len(self.data) * dt
        self.obs_end = self.obs_start + self.obs_dur * u.s
        self.obs_times = np.arange(len(self.data)) * dt
        self.obs_middle = self.obs_dur * u.s / 2 + self.obs_start

        self.pulsar = Pulsar(pulsarname=self.psr_name,
                             tobs=self.obs_start,
                             chisqperiod=self.chisqperiod)

        self.times = barcen_times(self.pulsar,
                                  len(self.data),
                                  obsstart=self.obs_start)
        self.freq_uncor = calc_central_freqs(self.mix_freq)
        self.freq = barcen_freqs(self.pulsar, self.freq_uncor,
                                 self.obs_middle)  # Probably unnecessary...
Esempio n. 10
0
#!/usr/bin/env python
# coding: utf-8

import argparse
from astropy.io import fits
from loadData import loader


def raw2fits(data, fitsfile, PulsarName, MixFreq, ObsDate):
    header = fits.Header()
    header['SRC_NAME'] = PulsarName
    header['FREQMIX'] = MixFreq
    header['DATE-OBS'] = ObsDate
    hdu = fits.ImageHDU(data, header) #Maybe we should use bintable instead at some point..
    hdu.writeto(fitsfile)
    
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-d', '--datafile', required=True, help='The location of the *.raw or *.dat data file.')
    parser.add_argument('-f', '--fitsfile', required=True, help='The location of the resulting *.fits fits file.')
    parser.add_argument('--pulsarname', required=True, help='The name of the pulsar, as noted in the database, \'./small-data-files/pulsarcat.csv\'.')
    parser.add_argument('--frequencymix', type=float, required=True, help='The mixing frequency set on the receiver in MHz (e.g. 420)')
    parser.add_argument('--obstime', required=True, help='The start datetime of the observation, in isot format (e.g. 2018-03-10T14:00:00)')
    args = parser.parse_args()
    raw2fits(loader(args.datafile), args.fitsfile, args.pulsarname, args.frequencymix, args.obstime)