def __init__(self, dataPath): self.dataPath = dataPath self.dataLoader = loader(self.dataPath) self.dataLoader.clean_inc_angle() self.model = [] self.train_test_split_val = 0.8 self.run_weight_name = 'model_weights_1219.hdf5'
def submission(self): print('Generating submission...') testLoader = loader('../iceberg_ship_classifier/data_test/test.json') self.model.load_weights(self.run_weight_name) pred = self.model.predict(testLoader.X_train) submission = pd.DataFrame() submission['id'] = testLoader.id submission['is_iceberg'] = pred.reshape((pred.shape[0])) submission.to_csv('sub_vgg_1221.csv', index=False)
def submission_on_best(self): print('Generating submission...') testLoader = loader('../iceberg_ship_classifier/data_test/test.json') minInd = self.loss.index(np.min(self.loss)) weight_name = "vgg_1220_weights_run_" + str(minInd) + ".hdf5" self.model.load_weights(weight_name) pred = self.model.predict(testLoader.X_train) submission = pd.DataFrame() submission['id'] = testLoader.id submission['is_iceberg'] = pred.reshape((pred.shape[0])) submission.to_csv('sub_vgg_1220.csv', index=False)
def submission(self, testpath): print('Generating submission...') testLoader = loader(testpath) if not self.model: self.create_model() self.model.load_weights(self.run_weight_name) pred = self.model.predict_proba(testLoader.X_train) submission = pd.DataFrame() submission['id'] = testLoader.id submission['is_iceberg'] = pred.reshape((pred.shape[0])) submission.to_csv('sub_' + self.run_weight_name[:-5] + '.csv', index=False)
def __init__(self, dataPath): self.dataPath = dataPath self.dataLoader = loader(self.dataPath) self.dataLoader.clean_inc_angle() self.model = self.vgg_model() self.train_test_split_val = 0.8 self.run_weight_name = 'vgg_pl_1223.hdf5' self.gen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, width_shift_range=0., height_shift_range=0., channel_shift_range=0, zoom_range=0.2, rotation_range=10) self.loss = []
def pseudoLabelTrain(self, test_path): trainImg, valImg, trainLabel, valLabel = train_test_split( self.dataLoader.X_train, self.dataLoader.labels, train_size=0.8) testLoader = loader(test_path) generator = self.gen.flow(trainImg, trainLabel) es, msave = self.callbacks(wname=self.run_weight_name) model = self.vgg_model_no_angle() model.fit_generator(generator, epochs=500, steps_per_epoch=24, verbose=1, validation_data=(valImg, valLabel), callbacks=[es]) predValues = model.predict(testLoader.X_train, verbose=1) for i in range(len(predValues)): if predValues[i] < 0.05 or predValues[i] > 0.95: tmp = np.ndarray((1, 75, 75, 3)) tmp[:, :, :, :] = testLoader.X_train[i] trainImg = np.concatenate((trainImg, tmp)) trainLabel = np.append(trainLabel, predValues[i] > 0.5) model_2 = self.vgg_model_no_angle() es, _ = self.callbacks(wname=self.run_weight_name) generator_2 = self.gen.flow(trainImg, trainLabel) model_2.fit_generator(generator_2, epochs=500, steps_per_epoch=24, verbose=1, validation_data=(valImg, valLabel), callbacks=[es, msave]) pred = model_2.predict(testLoader.X_train) submission = pd.DataFrame() submission['id'] = testLoader.id submission['is_iceberg'] = pred.reshape((pred.shape[0])) submission.to_csv('sub_vgg_pl_1223.csv', index=False) return 0
def pseudoLabelingValidation(self, test_path): testLoader = loader(test_path) testLoader.median_filter() trainLoader = self.dataLoader trainLoader.median_filter() n_split = 10 kfold = StratifiedKFold(n_splits=n_split, shuffle=True, random_state=16) count = 0 loss = [] trainImg, valImg, trainLabel, valLabel = train_test_split( trainLoader.X_train, trainLoader.labels, train_size=0.8) for train_k, test_k in kfold.split(trainImg, trainLabel): print('Run ' + str(count + 1) + ' out of ' + str(n_split)) run_wname = '/scratch/manjuns/iceberg_ship_classifier/run_%s_weights.hdf5' % count print('Run weight name: ' + str(run_wname)) tImg = trainImg[train_k] tLabel = trainLabel[train_k] generator = self.gen.flow(tImg, tLabel) es, msave = self.callbacks(wname=run_wname) model = self.vgg_model_no_angle() model.fit_generator(generator, epochs=500, steps_per_epoch=24, verbose=1, validation_data=(valImg, valLabel), callbacks=[es]) predValues = model.predict(testLoader.X_train) print('Fold ' + str(count) + ' training 1 completed. Psuedolabeling test data.......') for i in range(len(predValues)): if predValues[i] < 0.15 or predValues[i] > 0.85: tmp = np.ndarray((1, 75, 75, 3)) tmp[:, :, :, :] = testLoader.X_train[i] tImg = np.concatenate((tImg, tmp)) tLabel = np.append(tLabel, predValues[i] > 0.5) print('Fold ' + str(count) + ' training 2 commencing...') model_2 = self.vgg_model_no_angle() es, _ = self.callbacks(wname=self.run_weight_name) generator_2 = self.gen.flow(tImg, tLabel) model_2.fit_generator(generator_2, epochs=500, steps_per_epoch=24, verbose=1, validation_data=(valImg, valLabel), callbacks=[es, msave]) scores = model_2.evaluate(trainImg[test_k], trainLabel[test_k]) print(scores) loss.append(scores[0]) count += 1 for i in range(len(loss)): print("Run " + str(i + 1) + ": " + str(loss[i])) print("") print("Loss Mean: " + str(np.mean(loss)) + " Loss std: " + str(np.std(loss))) minInd = loss.index(min(loss)) print('Minimum Weight Index: ' + str(minInd)) bestRunWeight = '/scratch/manjuns/iceberg_ship_classifier/run_%s_weights.hdf5' % minInd bestModel = self.vgg_model_no_angle() bestModel.load_weights(bestRunWeight) pred = bestModel.predict(testLoader.X_train) submission = pd.DataFrame() submission['id'] = testLoader.id submission['is_iceberg'] = pred.reshape((pred.shape[0])) submission.to_csv('sub_vgg_pl_1230.csv', index=False) return 0
import argparse from astropy.io import fits from loadData import loader parser = argparse.ArgumentParser(description='') parser.add_argument('-d', '--datafile', required=True, help='The location of the *.raw or *.dat data file.') parser.add_argument('-f', '--fitsfile', required=True, help='The location of the resulting *.fits fits file.') parser.add_argument('--pulsarname', required=True, help='The name of the pulsar, as noted in the database, \'pulsardata.txt\'.') parser.add_argument('--frequencymix', type=float, required=True, help='The name of the pulsar, as noted in the database, \'pulsardata.txt\'.') parser.add_argument('--obstime', required=True, help='The start datetime of the observation, in isot format (e.g. 2018-03-10T14:00:00)') args = parser.parse_args() data = loader(args.datafile) header = fits.Header() header['SRC_NAME'] = args.pulsarname header['FREQMIX'] = args.frequencymix header['DATE-OBS'] = args.obstime hdu = fits.ImageHDU(data, header) hdu.writeto(args.fitsfile)
def __init__(self, fn_or_cfg): if type(fn_or_cfg) == Config: cfg = fn_or_cfg filename = cfg.FileName fileformat = cfg.FileFormat if 'FileFormat' in cfg else cfg.FileName.rstrip( '.gz').split('.')[-1] elif type(fn_or_cfg) == str: cfg = None filename = fn_or_cfg fileformat = filename.rstrip('.gz').split('.')[-1] self.fileformat = fileformat self.chisqperiod = None if fileformat == 'fits': hdulist = fits.open(filename) header = hdulist[1].header self.data = hdulist[1].data self.psr_name = header['SRC_NAME'] self.obs_start_isot = header['DATE-OBS'] self.obs_start = Time(self.obs_start_isot) self.mix_freq = header['FREQMIX'] if len(hdulist) > 2: self.chisqperiod = hdulist[2].header['bestp'] elif fileformat == 'fil': obs = Waterfall(filename) header = obs.header self.psr_name = header[b'source_name'].decode('utf-8')[4:] self.obs_start_isot = header[b'tstart'] self.obs_start = Time(self.obs_start_isot, format='mjd') # Tammo-Jan doesn't store the mix frequency self.mix_freq = header[b'fch1'] - 21.668359375 # These files have the frequency axis in decending, and include an extra unnecessary axis. self.data = np.squeeze(obs.data[:, :, -2::-1]).astype(np.uint16) # Some files have a missing frequency list for some reason. To avoid errors after folding, add some artificial data... self.data[:, ~np.any(self.data, axis=0)] += 65535 else: assert cfg.ObsMetaData, 'Need to supply the metadata: observation time, mixing frequency and pulsar!' self.data = loader(cfg.FileName).astype(np.uint32) if cfg is not None and cfg.ObsMetaData: # Overwrite with metadata from the Config if present self.psr_name = cfg.ObsMetaData.PulsarName self.obs_start_isot = cfg.ObsMetaData.ObsDate self.obs_start = Time(self.obs_start_isot) self.mix_freq = cfg.ObsMetaData.MixFreq dt = 64 * 512 / (70e6) self.obs_dur = len(self.data) * dt self.obs_end = self.obs_start + self.obs_dur * u.s self.obs_times = np.arange(len(self.data)) * dt self.obs_middle = self.obs_dur * u.s / 2 + self.obs_start self.pulsar = Pulsar(pulsarname=self.psr_name, tobs=self.obs_start, chisqperiod=self.chisqperiod) self.times = barcen_times(self.pulsar, len(self.data), obsstart=self.obs_start) self.freq_uncor = calc_central_freqs(self.mix_freq) self.freq = barcen_freqs(self.pulsar, self.freq_uncor, self.obs_middle) # Probably unnecessary...
#!/usr/bin/env python # coding: utf-8 import argparse from astropy.io import fits from loadData import loader def raw2fits(data, fitsfile, PulsarName, MixFreq, ObsDate): header = fits.Header() header['SRC_NAME'] = PulsarName header['FREQMIX'] = MixFreq header['DATE-OBS'] = ObsDate hdu = fits.ImageHDU(data, header) #Maybe we should use bintable instead at some point.. hdu.writeto(fitsfile) if __name__ == '__main__': parser = argparse.ArgumentParser(description='') parser.add_argument('-d', '--datafile', required=True, help='The location of the *.raw or *.dat data file.') parser.add_argument('-f', '--fitsfile', required=True, help='The location of the resulting *.fits fits file.') parser.add_argument('--pulsarname', required=True, help='The name of the pulsar, as noted in the database, \'./small-data-files/pulsarcat.csv\'.') parser.add_argument('--frequencymix', type=float, required=True, help='The mixing frequency set on the receiver in MHz (e.g. 420)') parser.add_argument('--obstime', required=True, help='The start datetime of the observation, in isot format (e.g. 2018-03-10T14:00:00)') args = parser.parse_args() raw2fits(loader(args.datafile), args.fitsfile, args.pulsarname, args.frequencymix, args.obstime)