def haar_coeffs(lc): TOP_COEFFS = 15 POW_2_USED = 9 # length of 512 for all lcs #next_pow_2 = int(math.ceil(math.log(len(lc.time), 2))) # arithmetical error... #if 2 ** next_pow_2 < len(lc.time): # next_pow_2 += 1 filled_lc = LightCurve(lc.time[:], lc.flux[:]) # fill out lc with blanks '-' remaining = 2**POW_2_USED - len(filled_lc.time) filled_lc.time += range( int(lc.time[-1]) + 1, int(lc.time[-1]) + remaining + 1) filled_lc.flux += ['-'] * remaining transform = utils.haar_transform(filled_lc.flux) # normalise the transformed haar spectra, and fill in missing data with 0s norm_transform = [] # replace missing data with 0 coefficient for item in transform: if item == '-': norm_transform.append(0.0) else: norm_transform.append(item) norm_transform = norm_transform[: TOP_COEFFS] # take TOP_COEFF number of coeffs #if len(norm_transform) < TOP_COEFFS: # norm_transform += [0.0] * (TOP_COEFFS - len(norm_transform)) return norm_transform
def available(lc, percent): assert percent > 0 and percent <= 100 #print percent #print len(lc.time) #print percent / 100.0 #print floor(len(lc.time) * (percent / 100.0)) avail_range = int(floor(len(lc.time) * (percent / 100.0))) return LightCurve(lc.time[:avail_range], lc.flux[:avail_range])
def signal_noise(lc, sig_noise_ratio): # First compute signal std dev lc_len = len(lc.time) * 1.0 sigma = sqrt( sum([x**2 for x in lc.flux]) / lc_len - (sum(lc.flux) / lc_len)**2) # Add random noise noise_amt = sig_noise_ratio * sigma return LightCurve(lc.time, [x + numpy.random.normal(0, noise_amt) for x in lc.flux])
def lc_to_features(lc): lc_with_gaps = lc.copy() # see ../lightcurve.py lc.remove_gaps() # see ../lightcurve.py flux = lc.flux[:] flux_mean = numpy.mean(flux) flux_std = numpy.std(flux) centered_flux = [(e - flux_mean) / (1.0 * flux_std) for e in flux] lc_centered = LightCurve(lc.time[:], centered_flux) return \ flux_only(lc_centered) + time_flux(lc_centered) + haar_coeffs(lc_with_gaps) + spectral_features(lc_centered)
def distribute(lc): lc = normalise(lc) # normalise first flux = lc.flux[:] min = 1 max = 1000 base_brightness = random.uniform(max**-2.3, min)**(1 / -2.3) #print "base brightness:", base_brightness new_mean = random.uniform(max**-2.3, min)**(1 / -2.3) #print "new mean:", new_mean for obs_num in xrange(len(flux)): flux[obs_num] = flux[obs_num] * new_mean + base_brightness return LightCurve(lc.time[:], flux[:])
def sample(lc, maxlen): if len(lc.flux) <= maxlen: return lc else: new_lc = LightCurve() sparsity = int(len(lc.flux) / (1.0 * maxlen)) #print "sparsity:", sparsity for i, point in enumerate(lc.flux): if i % sparsity == 0: new_lc.time.append(lc.time[i]) new_lc.flux.append(lc.flux[i]) return new_lc
def normalise(lc, new_mean=1): # Compute mean of the curve #print max(lc.flux) #print min(lc.flux) flux_sum = 0 flux_mean = numpy.average(lc.flux) flux_std = numpy.std(lc.flux) # Update the flux measurements for i in xrange(len(lc.flux)): if flux_std > 1e-10: lc.flux[i] = (lc.flux[i] - flux_mean) / flux_std else: # very flat signal - dividing should not really change it much lc.flux[i] = lc.flux[i] - flux_mean return LightCurve(lc.time[:], lc.flux[:])
def gapify(lc, gap_amt): MINIMUM_POINTS = 5 gap_types = [1, 2, 5] gaps = [] time = lc.time[:] flux = lc.flux[:] remove_amt = int(floor(len(flux) * gap_amt / 100.0)) if len(time) - remove_amt < MINIMUM_POINTS: remove_amt = len(time) - MINIMUM_POINTS removed = 0 while removed < remove_amt: gap_size = random.choice([1, 2, 5]) if gap_size > remove_amt - removed: gap_size = remove_amt - removed removed += gap_size gap_start = random.randrange(1, len(flux) - gap_size) # work forwards from insertion position (if you run out of space go back) position = gap_start while position < len(flux) and gap_size > 0: if flux[position] != '-': flux[position] = '-' gap_size -= 1 position += 1 # work backwards from insertion position position = gap_start while position > 0 and gap_size > 0: if flux[position] != '-': flux[position] = '-' gap_size -= 1 position -= 1 # old way #gaps.append((gap_start, time[gap_start:gap_start + gap_size])) #time = time[:gap_start] + time[gap_start + gap_size:] #flux = flux[:gap_start] + flux[gap_start + gap_size:] #print "done finding gaps" #print "rebuilding" # reintroduce gaps as '-' #old way #for gap in reversed(gaps): # start = gap[0] # time = time[:start] + gap[1] + time[start:] # flux = flux[:start] + ['-'] * len(gap[1]) + flux[start:] new_lc = LightCurve(time[:], flux[:]) #print "done rebuilding" return new_lc
def preprocess(self): # Create test data if it is not already there new_directory = self.LC_DIRECTORY + '/' + self.file_prefix exists = False incomplete = False if self.file_prefix in os.listdir(self.LC_DIRECTORY): if len(os.listdir(new_directory)) == len( os.listdir(self.LC_DIRECTORY + '/' + self.RAW_LC_DIRECTORY)): exists = True print "directory exists:", new_directory else: incomplete = True pass # figure out how to delete directory with files # os.rmdir(new_directory) if not exists: if not incomplete: os.mkdir(new_directory) done = 0 increment = 50 for lc_file in os.listdir(self.LC_DIRECTORY + '/' + self.RAW_LC_DIRECTORY): if done % increment == 0: print "{0}/{1}".format( done, len( os.listdir(self.LC_DIRECTORY + '/' + self.RAW_LC_DIRECTORY))) done += 1 lc = 'a' lc = LightCurve() # see lightcurve.py # read in all lc data lc_data = open(self.LC_DIRECTORY + '/' + self.RAW_LC_DIRECTORY + '/' + lc_file) for line in lc_data: if '\t' in line: line = line.strip().split('\t') elif ',' in line: line = line.strip().split(',') time = line[0].replace(',', '') flux = line[1].replace(',', '') lc.time.append(float(time)) lc.flux.append(float(flux)) lc_data.close() # check the parameters if (not self.pl_distribute) and (not self.normalise): print "no distribution chosen..." raise Exception('no distribution chosen') lc = all_distortions(lc, self.noise, self.available_pct, self.missing, self.pl_distribute) # see distortions.py # write the distorted data out #print "writing file..." lc_out = open( 'lightcurves/' + self.file_prefix + '/' + lc_file, 'w') for index in xrange(len(lc.time)): lc_out.write('{0}\t{1}\n'.format(str(lc.time[index]), str(lc.flux[index]))) lc_out.close()
import sys from lightcurve import LightCurve """Takes a file name of the a lightcurve as the command line arg. For running on the hpc""" fname = '/home/apps/astro/DATA/CRTSQSO'+sys.argv[1][1:] lc = LightCurve(fname) lc.analyze("/home/uchile/cmm/astrolab/student01/mjh/outfiles/"+fname.split('/')[-1])
final_fp_inc = None final_sp_dec = None final_sp_inc = None for fp in xrange(len(flux) - 1): for sp in xrange(fp + 1, len(flux)): slope = (flux[sp] - flux[fp]) / (time[sp] - time[fp]) if slope < best_decrease: best_decrease = slope final_sp_dec = sp final_fp_dec = fp if slope > best_increase: best_increase = slope final_sp_inc = sp final_fp_inc = fp lc = LightCurve(time, flux) spt = features.slope_pair_trends(lc) print "inc" print final_sp_inc print final_fp_inc print "dec" print final_sp_dec print final_fp_dec ax.plot([time[final_sp_inc], time[final_fp_inc]], [flux[final_sp_inc], flux[final_fp_inc]], 'g-x') ax.plot([time[final_sp_dec], time[final_fp_dec]], [flux[final_sp_dec], flux[final_fp_dec]], 'b-x') textstr = "" textstr += '$\mathrm{best\\_increase}=%.2f$\n$\mathrm{best\\_decrease}=%.2f$\n' % ( best_increase, best_decrease)
import sys from lightcurve import LightCurve """Takes a file name of the a lightcurve as the command line arg. For running on the hpc""" fname = '/home/apps/astro/DATA/CRTSQSO' + sys.argv[1][1:] lc = LightCurve(fname) lc.analyze("/home/uchile/cmm/astrolab/student01/mjh/outfiles/" + fname.split('/')[-1])