def mod(self, parameter_s='', local_ns=None): """ Define the type of model that will be adjusted to the data. Type 'mod -h' for more help """ from shell_colors import yellow, blue, red args = parse_arg_string('mod', parameter_s) if args == 1: # called without arguments, show how it's done msg = yellow('Usage: ') + 'mod [k<n>] [d<n>]\n' + \ 'Options: k<n> Number of keplerian signals\n' + \ ' d<n> Degree of polynomial drift' clogger.fatal(msg) return if 'default' in local_ns: system = local_ns['default'] if system.model is None: system.model = {} system.model['k'] = k = int(args[0][1]) system.model['d'] = d = int(args[1][1]) else: msg = red('ERROR: ') + 'Set a default system or provide a system '+\ 'name with the -n option' clogger.fatal(msg) return # this should be logged? print blue('Current model:'), k, 'kep,', d, 'drifts'
def fit_gp(model, initial, data, ncpu, nwalkers=20): k, lnlike, lnprior, lnprob = model ndim = len(initial) p0 = [np.array(initial) + rel(initial, 1) * np.random.randn(ndim) for i in xrange(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=data, threads=1) msg = blue(' :: ') + 'Running burn-in...' clogger.info(msg) p0, lnp, _ = sampler.run_mcmc(p0, 100) sampler.reset() p0, lnp, _ = sampler.run_mcmc(p0, 200) sampler.reset() niter = 1000 msg = blue(' :: ') + 'Running %d MCMC chains for %d iterations...' % (nwalkers, niter) clogger.info(msg) logl = [] t1 = time() for p0, lnp, _ in sampler.sample(p0, None, None, iterations=niter): # pass logl.append(max(lnp)) # p0, lnp, _ = sampler.run_mcmc(p0, niter) t2 = time() logl = np.array(logl) p = p0[np.argmax(lnp)] msg = blue(' :: ') + 'MCMC took %f seconds' % (t2-t1) clogger.info(msg) return sampler, p, logl
def write_rv(system, filename, **kwargs): """ Write system's RVs (and everything else) to a file. Parameters ---------- system: instance of rvSeries System for which to save information. filename: string Name of the output file. Optional kwargs --------------- Returns ------- nothing """ if os.path.isfile(filename): # this file exists clogger.warning(yellow('Warning: ')+'File already exists. Replace? [Y/n]') if ask_yes_no(' ', default=True): pass else: clogger.info(blue('INFO: ')+'Aborting.') return with open(system.provenance.keys()[0]) as f: header1 = f.readline() header2 = f.readline() extras_original_order = header1.split()[3:] if system.units == 'm/s': mean_vrad = system.vrad.mean() X = [system.time, (system.vrad - mean_vrad)*1e-3 + mean_vrad, system.error*1e-3] else: X = [system.time, system.vrad, system.error] for e in extras_original_order: # find extras index i = system.extras._fields.index(e) X.append(system.extras[i]) fmt = ['%12.6f', '%8.5f', '%7.5f'] + ['%7.5f']*len(system.extras) savetxt(filename, zip(*X), fmt=fmt, delimiter='\t', header=header1+header2[:-1], comments='') clogger.info(blue('INFO: ')+'Wrote to file '+filename)
def to_mps(self, parameter_s='', local_ns=None): # Convert data to meters per second, if in km/s from shell_colors import blue args = parse_arg_string('to_mps', parameter_s) # print args # use default system or user defined try: if 'default' in local_ns and not args['-n']: system = local_ns['default'] else: system_name = args['SYSTEM'] system = local_ns[system_name] except KeyError: from shell_colors import red msg = red('ERROR: ') + 'Set a default system or provide a system '+\ 'name with the -n option' clogger.fatal(msg) return if (min(system.error) < 0.01): msg = blue('INFO: ') + 'Converting to m/s' clogger.info(msg) system.vrad = (system.vrad - mean(system.vrad)) * 1e3 system.error *= 1e3 system.units = 'm/s'
def onpick3(event): global chunkid x, y = event.xdata, event.ydata msg = blue(' : ') msg += 'chunk %d: %8.2f --> %8.2f' % (chunkid, chunkx[chunkid-1], x) clogger.info(msg) chunkx.append(x) chunkid += 1
def onpick3(event): global times ind = event.ind i, x, y = ind[0], np.take(system.time, ind)[0], np.take(system.vrad, ind)[0] indices_to_remove.append(i) msg = blue(' : ') msg += 'going to remove observation %d -> %8.2f, %8.2f' % (i+1, x, y) clogger.info(msg)
def do_correlate(system, vars=(), verbose=False): # just to be sure, but this should not pass through docopt in commands.py if len(vars) != 2: return var1 = vars[0] var2 = vars[1] # handle inexistent fields available = system.extras._fields + ('vrad',) if var1 not in available: msg = red('ERROR: ') + 'The name "%s" is not available for correlation.\n' % var1 clogger.fatal(msg) return if var2 not in available: msg = red('ERROR: ') + 'The name "%s" is not available for correlation.\n' % var2 clogger.fatal(msg) return if var1 == 'vrad': v1 = system.vrad else: i = system.extras._fields.index(var1) v1 = system.extras[i] if var2 == 'vrad': v2 = system.vrad else: i = system.extras._fields.index(var2) v2 = system.extras[i] pr = pearsonr(v1, v2) sr = spearmanr(v1, v2) if verbose: print blue('[Pearson correlation]') + ' r=%f, p-value=%f' % pr print blue('[Spearman correlation]') + ' r=%f, p-value=%f' % sr # label = figure() plot(v1, v2, 'o') xlabel(var1) ylabel(var2) tight_layout() show()
def read(self, parameter_s='', local_ns=None): """ Read files with RV measurements. Type 'read -h' for more help """ try: args = parse_arg_string('read', parameter_s) except DocoptExit: print read_usage.lstrip() return except SystemExit: return # take care of glob (and tilde) expansions files = args['<file>'] # hack for metal-poor files if len(files) == 1 and files[0].startswith('HD'): files = ['/home/joao/phd/data/'+files[0]+'_harps_mean_corr.rdb'] ## globs = [glob.glob(expanduser(f)) for f in files] filenames = list(chain.from_iterable(globs)) # some magic... # if 'default' system is already set, return the rvSeries class # this is useful when working with various systems simultaneously so # that we can do, e.g., HDXXXX = %read file1 file2 if not args['-d']: try: return rvSeries(*filenames, skip=args['--skip'], verbose=not args['--quiet']) except AttributeError: pass else: try: local_ns['default'] = rvSeries(*filenames, skip=args['--skip'], verbose=not args['--quiet']) except IOError: return default = local_ns['default'] if args['--verbose'] and not args['--quiet']: default.stats() if (min(default.error) < 0.01 and not args['--nomps']): from shell_colors import blue mean_vrad = mean(default.vrad) if not args['--quiet']: # msg = blue('INFO: ') + 'Converting to m/s and subtracting mean value of %f' % mean_vrad msg = blue('INFO: ') + 'Converting to m/s' clogger.info(msg) default.vrad = (default.vrad - mean_vrad)*1e3 + mean_vrad default.error *= 1e3 default.vrad_full = (default.vrad_full - mean(default.vrad_full))*1e3 + mean(default.vrad_full) default.error_full *= 1e3 default.units = 'm/s'
def do_multinest(system): msg = blue('INFO: ') + 'Transfering data to MultiNest...' clogger.info(msg) # write data to file to be read by MultiNest nest_filename = 'input.rv' nest_header = 'file automatically generated for MultiNest analysis, ' + timestamp nest_header += '\n' + str(len(system.time)) savetxt(nest_filename, zip(system.time, system.vrad, system.error), header=nest_header, fmt=['%12.6f', '%7.5f', '%7.5f']) msg = blue('INFO: ') + 'Starting MultiNest...' clogger.info(msg) cmd = 'mpirun -np 2 ./OPEN/multinest/nest' subprocess.call(cmd, shell=True) # os.system(cmd) return
def selectable_plot(system, **kwargs): from shell_colors import yellow, blue from .logger import clogger msg = blue('INFO: ') + 'Click on a point in the plot to remove it.' clogger.info(msg) msg = blue(' : ') + 'Press ENTER when you are finished' clogger.info(msg) indices_to_remove = [] global times times = 0 def onpick3(event): global times ind = event.ind i, x, y = ind[0], np.take(system.time, ind)[0], np.take(system.vrad, ind)[0] indices_to_remove.append(i) msg = blue(' : ') msg += 'going to remove observation %d -> %8.2f, %8.2f' % (i+1, x, y) clogger.info(msg) # print 'onpick3 scatter:', ind, np.take(system.time, ind), np.take(system.vrad, ind) # ax.scatter(np.take(system.time, ind), np.take(system.vrad, ind), color='r') # fig.show() fig, ax = plt.subplots() e = ax.errorbar(system.time, system.vrad, system.error, fmt='o') col = ax.scatter(system.time, system.vrad, picker=True) ax.set_xlabel('Time [days]') ax.set_ylabel('RV [%s]'%system.units) fig.canvas.mpl_connect('pick_event', onpick3) plt.show() # wait for user input to finish raw_input('') plt.close(fig) return unique(indices_to_remove)
def selectable_plot_chunks(system, **kwargs): from shell_colors import yellow, blue from .logger import clogger msg = blue('INFO: ') + 'Click on the plot to select the data chunks.' clogger.info(msg) msg = blue(' : ') + 'Press ENTER when you are finished' clogger.info(msg) print '' chunkx = [] chunkx.append(system.time.min()) global chunkid chunkid = 1 def onpick3(event): global chunkid x, y = event.xdata, event.ydata msg = blue(' : ') msg += 'chunk %d: %8.2f --> %8.2f' % (chunkid, chunkx[chunkid-1], x) clogger.info(msg) chunkx.append(x) chunkid += 1 fig, ax = plt.subplots() e = ax.errorbar(system.time, system.vrad, system.error, fmt='o', picker=True) # col = ax.scatter(system.time, system.vrad, picker=True) ax.set_xlabel('Time [days]') ax.set_ylabel('RV [%s]'%system.units) ax.margins(0.1) fig.canvas.mpl_connect('button_press_event', onpick3) # wait for user input to finish raw_input('') plt.close(fig) return chunkx
def do_restrict(system, quantity, *args): ## restrict by uncertainty value if quantity == 'error': msg = blue('INFO: ') + 'Removing data with uncertainty higher than %f km/s' % args[0] clogger.info(msg) maxerr = args[0] return # we have to keep a record of how many values come out of each file t, rv, err = system.time_full, system.vrad_full, system.error_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): print i, n1, n2 # print err[:n1] val = err[:n1] <= maxerr nout = (val == False).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = nout print (val == False).sum(), n1 t, rv, err = t[n1:], rv[n1:], err[n1:] # now build the full boolean vector vals = system.error_full <= maxerr print vals, len(vals) # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[vals] system.vrad = system.vrad_full[vals] system.error = system.error_full[vals] ## restrict by date (JD) if quantity == 'date': msg = blue('INFO: ') + 'Retaining data between %i and %i JD' % (args[0], args[1]) clogger.info(msg) minjd, maxjd = args[0], args[1] # we have to keep a record of how many values come out of each file t, rv, err = system.time_full, system.vrad_full, system.error_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): lower = t[:n1] <= minjd higher = t[:n1] <= maxjd nout = (lower == True).sum() nout += (higher == True).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = n1 - nout t, rv, err = t[n1:], rv[n1:], err[n1:] # now build the full boolean vector lowers = system.time_full <= minjd highers = system.time_full >= maxjd # fancy syntax just to negate the intersection of lowers and highers keepers = ~(lowers | highers) # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[keepers] system.vrad = system.vrad_full[keepers] system.error = system.error_full[keepers] # also from extras, but this is trickier d = system.extras._asdict() # asdict because namedtuple is immutable for i, field in enumerate(system.extras._fields): d[field] = system.extras_full[i][keepers] extra = namedtuple('Extra', system.extras_names, verbose=False) system.extras = extra(**d) ## restrict to values from one year if quantity == 'year': msg = blue('INFO: ') + 'Retaining data from %i' % args[0] clogger.info(msg) yr = args[0] # we have to keep a record of how many values come out of each file time = system.time_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): years = np.array([julian_day_to_date(t)[0] for t in time[:n1]]) keepers = years == yr nout = (keepers == True).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = n1 - nout time = time[n1:] # build the full boolean vector years = np.array([julian_day_to_date(t)[0] for t in system.time_full]) keepers = years == yr # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[keepers] system.vrad = system.vrad_full[keepers] system.error = system.error_full[keepers] # also from extras d = system.extras._asdict() # asdict because namedtuple is immutable for i, field in enumerate(system.extras._fields): d[field] = system.extras_full[i][keepers] extra = namedtuple('Extra', system.extras_names, verbose=False) system.extras = extra(**d) ## restrict to values from a year range if quantity == 'years': msg = blue('INFO: ') + 'Retaining data between %i and %i' % (args[0], args[1]) clogger.info(msg) yr1, yr2 = args[0], args[1] # we have to keep a record of how many values come out of each file time = system.time_full # temporaries for i, (fname, [n1, n2]) in enumerate(sorted(system.provenance.iteritems())): years = np.array([julian_day_to_date(t)[0] for t in time[:n1]]) keepers = (years >= yr1) & (years <= yr2) nout = (keepers == True).sum() # system.provenance keeps the record if nout >= n1: system.provenance[fname][1] = n1 else: system.provenance[fname][1] = n1 - nout time = time[n1:] # now build the full boolean vector years = np.array([julian_day_to_date(t)[0] for t in system.time_full]) keepers = (years >= yr1) & (years <= yr2) # and pop out the values from time, vrad, and error # leaving all *_full vectors intact system.time = system.time_full[keepers] system.vrad = system.vrad_full[keepers] system.error = system.error_full[keepers] # also from extras, but this is trickier d = system.extras._asdict() # asdict because namedtuple is immutable for i, field in enumerate(system.extras._fields): d[field] = system.extras_full[i][keepers] extra = namedtuple('Extra', system.extras_names, verbose=False) system.extras = extra(**d) return
def do_genetic(system, just_gen=False): """ Carry out the fit using a genetic algorithm and if just_gen=False try to improve it with a run of the LM algorithm """ try: degree = system.model['d'] keplerians = system.model['k'] except TypeError: msg = red('Error: ') + 'Need to run mod before gen. ' clogger.error(msg) return maxP = system.per.get_peaks(output_period=True)[1] size_maxP = 10**(len(str(int(maxP)))-1) system.fit = {} msg = blue('INFO: ') + 'Initializing genetic algorithm...' clogger.info(msg) msg = blue(' : ') + 'Model is: %d keplerians + %d drift' % (keplerians, degree) clogger.info(msg) vel = zeros_like(system.time) def chi2_1(individual): """ Fitness function for 1 planet model """ P, K, ecc, omega, T0, gam = individual get_rvn(system.time, P, K, ecc, omega, T0, gam, vel) chi2 = sum(((system.vrad - vel)/system.error)**2) #print chi2 return chi2, def chi2_n(individual): """ Fitness function for N planet model """ P, K, ecc, omega, T0, gam = [individual[i::6] for i in range(6)] #print ecc get_rvn(system.time, P, K, ecc, omega, T0, gam[0], vel) #print 'out of get_rvn' chi2 = sum(((system.vrad - vel)/system.error)**2) #print chi2 return chi2, ## create the required types -- the fitness and the individual. creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) # minimization of a single objective creator.create("Individual", list, fitness=creator.FitnessMin) ## create parameters by sampling from their priors def P_prior(): return random.uniform(5, 1000) # return random.gauss(maxP, size_maxP) def K_prior(): return random.uniform(0, 150) def ecc_prior(): return random.uniform(0, 0.9) def om_prior(): return random.uniform(0, 360) def t0_prior(): return random.uniform(2350000, 2550000) def gamma_prior(): return random.uniform(-100, 100) priors = [P_prior, K_prior, ecc_prior, om_prior, t0_prior, gamma_prior] toolbox = base.Toolbox() toolbox.register("individual", tools.initCycle, creator.Individual, priors, n=keplerians) toolbox.register("population", tools.initRepeat, list, toolbox.individual) def mutPrior(individual, indpb): for i, fcn in enumerate(zip(individual, priors)): if random.random() < indpb: individual[i] = fcn[1]() return individual, toolbox.register("evaluate", chi2_n) toolbox.register("mate", tools.cxTwoPoints) toolbox.register("mutate", mutPrior, indpb=0.10) toolbox.register("select", tools.selTournament, tournsize=3) npop = 500 ngen = 150 npar = 5*keplerians+1 ## build the population pop = toolbox.population(n=npop) ## helper functions hof = tools.HallOfFame(1) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", nanmean) # stats.register("std", nanstd) stats.register("min", np.nanmin) # stats.register("max", np.nanmax) # stats.register("total", sigma3) stats.register("red", lambda v: min(v)/(len(system.time)-npar) ) msg = blue('INFO: ') + 'Created population with N=%d. Going to evolve for %d generations...' % (npop,ngen) clogger.info(msg) ## run the genetic algorithm algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=ngen, stats=stats, halloffame=hof, verbose=True) ## output results information msg = yellow('RESULT: ') + 'Best individual is' clogger.info(msg) ## loop over planets print("%3s %12s %10s %10s %10s %15s %9s" % \ ('', 'P[days]', 'K[km/s]', 'e', unichr(0x3c9).encode('utf-8')+'[deg]', 'T0[days]', 'gam') ) for i, planet in enumerate(list(ascii_lowercase)[:keplerians]): P, K, ecc, omega, T0, gam = [hof[0][j::6] for j in range(6)] print("%3s %12.1f %10.2f %10.2f %10.2f %15.2f %9.2f" % (planet, P[i], K[i], ecc[i], omega[i], T0[i], gam[i]) ) msg = yellow('RESULT: ') + 'Best fitness value: %s\n' % (hof[0].fitness) clogger.info(msg) if just_gen: # save fit in the system and return, no need for LM system.fit['params'] = hof[0] system.fit['chi2'] = hof[0].fitness/(len(system.time)-npar) return msg = blue('INFO: ') + 'Calling LM to improve result...' clogger.info(msg) ## call levenberg markardt fit lm = do_lm(system, [hof[0][j::6] for j in range(6)]) lm_par = lm[0] ## loop over planets msg = yellow('RESULT: ') + 'Best fit is' clogger.info(msg) print("%3s %12s %10s %10s %10s %15s %9s" % \ ('', 'P[days]', 'K[km/s]', 'e', unichr(0x3c9).encode('utf-8')+'[deg]', 'T0[days]', 'gam') ) for i, planet in enumerate(list(ascii_lowercase)[:keplerians]): P, K, ecc, omega, T0, gam = [lm_par[j::6] for j in range(6)] print("%3s %12.1f %10.2f %10.2f %10.2f %15.2f %9.2f" % (planet, P[i], K[i], ecc[i], omega[i], T0[i], gam[i]) ) chi2 = chi2_n(lm_par)[0] msg = yellow('RESULT: ') + 'Best fitness value: %f, %f' % (chi2, chi2/(len(system.time)-npar)) clogger.info(msg) # save fit in the system system.fit['params'] = lm_par system.fit['chi2'] = chi2 # # print p.minFitness, p.maxFitness, p.avgFitness, p.sumFitness # print 'Genetic:', p.bestFitIndividual, p.bestFitIndividual.fitness # lm = do_lm(system, p.bestFitIndividual.genes) # lm_par = lm[0] # print 'LM:', lm_par # # get best solution curve # new_time = system.get_time_to_plot() # vel = zeros_like(new_time) # P, K, ecc, omega, t0 = p.bestFitIndividual.genes # get_rv(new_time, P, K, ecc, omega, t0, vel) # # plot RV with time # plot(system.time, system.vrad, 'o') # plot(new_time, vel, '-') # P, K, ecc, omega, t0 = lm_par # get_rv(new_time, P, K, ecc, omega, t0, vel) # plot(new_time, vel, 'r-') # show() return
def read_rv(*filenames, **kwargs): """ Read one or more files containing radial velocity measurements. Parameters ---------- filenames: string One or more files to read. Returns ------- t: array Times of observations. rv: array Radial velocity values. err: array Error in the radial velocity. dic: dict Dictionary with name and number of values from each file. """ # set logging level clogger.setLevel(logging.VERBOSE) \ if (kwargs.has_key('verbose') and kwargs['verbose']) \ else clogger.setLevel(logging.INFO) # how many header lines to skip? if (kwargs.has_key('skip')): header_skip = int(kwargs['skip']) # format of file if (kwargs.has_key('format')): format = kwargs['format'] format = 'drs35' if (format is None) else format dic = {} # will hold how many values per file for filename in sorted(filenames): if os.path.isfile(filename) and os.access(filename, os.R_OK): # this file exists and is readable with rvfile(filename) as f: nlines = len(f.readuncommented()) dic[filename] = [nlines, 0] clogger.info('Reading %d values from file %s' % (nlines, filename)) else: # should raise an error or read from the other files? raise IOError("The file '%s' doesn't seem to exist" % filename) # black magic to build input from file list while skipping headers finput = [FileInput(f) for f in sorted(filenames)] iterables = [islice(f, header_skip, None) for f in finput] files = chain(*iterables) # read data if format == 'drs35': # default t, rv, err, \ fwhm, contrast, bis_span, noise, s_mw, sig_s, \ rhk, sig_rhk, sn_CaII, sn10, sn50, sn60 = loadtxt(files, unpack=True) others = (fwhm, contrast, bis_span, noise, s_mw, sig_s, rhk, sig_rhk, sn_CaII, sn10, sn50, sn60) elif format == 'drs34' or format == 'coralie': t, rv, err, fwhm, contrast, bis_span, noise, sn10, sn50, sn60 = loadtxt(files, unpack=True, usecols=(0,1,2)) others = (fwhm, contrast, bis_span, noise, sn10, sn50, sn60) # elif format == 'coralie': # t, rv, err, # fwhm, contrast, bis_span, noise, sn10, sn50, sn60 = loadtxt(files, unpack=True) # others = (fwhm, contrast, bis_span, noise, sn10, sn50, sn60) elif format == 'basic': t, rv, err = loadtxt(files, unpack=True, usecols=(0,1,2)) others = () # verbose stats about data info = blue('INFO: ') sinfo = blue(' : ') stats = None if (kwargs.has_key('verbose') and kwargs['verbose']): tspan = max(t) - min(t) rvspan = max(rv) - min(rv) stats = '\n' stats += info + "Timespan : %f days = %f years --- %fJD, %fJD\n" % (tspan, day2year(tspan), max(t), min(t)) stats += sinfo + "RV span : %f km/s = %f m/s\n" % (rvspan, rvspan*1e3) stats += sinfo + "RV rms [m/s] : %f\n\n" % rms(rv) stats += sinfo + "{:14s} : {:10.3f}\n".format('<RV> [km/s]', mean(rv)) if format in ('drs35', 'drs34', 'coralie'): stats += sinfo + "{:14s} : {:10.3f}\n".format('<fwhm> [km/s]', mean(others[0])) stats += sinfo + "{:14s} : {:10.3f}\n".format('<contrast>', mean(others[1])) stats += sinfo + "{:14s} : {:10.3f}\n".format('<BIS> [km/s]', mean(others[2])) if format in ('drs35'): stats += sinfo + "{:14s} : {:10.3f}\n".format('<S_index> [MW]', mean(others[4])) stats += sinfo + "{:14s} : {:10.3f}\n".format('<log(rhk)>', mean(others[6])) clogger.verbose(stats) return t, rv, err, dic, others
def read_rv(*filenames, **kwargs): """ Read one or more files containing radial velocity measurements. Parameters ---------- filenames: string One or more files to read Optional kwargs --------------- skip: number of lines to skip in the files' headers verbose: verbosity toggle Returns ------- data: dict Data in the files with keys the column names dic: dict Dictionary with name of file and number of values from each file """ # verbosity verbose = kwargs.get('verbose', True) # how many header lines to skip? if ('skip' in kwargs): header_skip = int(kwargs['skip']) dic = {} # will hold how many values per file for filename in sorted(filenames): if os.path.isfile(filename) and os.access(filename, os.R_OK): # this file exists and is readable with rvfile(filename) as f: nlines = len(f.readuncommented()) dic[filename] = [nlines, 0] if verbose: msg = blue('INFO:') + ' Reading %d values from file %s' % (nlines, filename) clogger.info(msg) else: # should raise an error or read from the other files? raise IOError("The file '%s' doesn't seem to exist" % filename) # black magic to build input from file list while skipping headers if (header_skip>0): # the first file's header is needed for genfromtxt names with open(filenames[0]) as f: header = StringIO.StringIO(f.readline()) else: # assume only 3 columns if no column names present header = StringIO.StringIO('jdb vrad svrad') finput = [FileInput(f) for f in sorted(filenames)] # joins files in one list iterables = [islice(f, header_skip, None) for f in finput] # remove each file's first header_skip lines iterables.insert(0, header) # insert header back again files = chain(*iterables) # chains everything in one iterable # # read data # if format == 'drs35': # default # t, rv, err, \ # fwhm, contrast, bis_span, noise, s_mw, sig_s, \ # rhk, sig_rhk, sn_CaII, sn10, sn50, sn60 = loadtxt(files, unpack=True) # others = (fwhm, contrast, bis_span, noise, s_mw, sig_s, rhk, sig_rhk, sn_CaII, sn10, sn50, sn60) # elif format == 'drs34' or format == 'coralie': # t, rv, err, \ # fwhm, contrast, bis_span, noise, sn10, sn50, sn60 = loadtxt(files, unpack=True) # others = (fwhm, contrast, bis_span, noise, sn10, sn50, sn60) # # elif format == 'coralie': # # t, rv, err, # # fwhm, contrast, bis_span, noise, sn10, sn50, sn60 = loadtxt(files, unpack=True) # # others = (fwhm, contrast, bis_span, noise, sn10, sn50, sn60) # elif format == 'basic': # t, rv, err = loadtxt(files, unpack=True, usecols=(0,1,2)) # others = () data = genfromtxt(files, unpack=True, names=True) # this returns a structured array # this casts it into a dictionary because I don't understand structured arrays... data = {field:notnan(data[field]) for field in data.dtype.names} return data, dic
def _output(self, verbose=False): """ Some statistical output. """ from shell_colors import blue # Index with maximum power bbin = argmax(self.power) # Maximum power pmax = self._upow[bbin] rms = sqrt(self._YY * (1.-pmax)) # Get the curvature in the power peak by fitting a parabola y=aa*x^2 if (bbin > 1) and (bbin < len(self.freq)-2): # Shift the parabola origin to power peak xh = (self.freq[bbin-1:bbin+2] - self.freq[bbin])**2 yh = self._upow[bbin-1:bbin+2] - self._upow[bbin] # Calculate the curvature (final equation from least square) aa = sum(yh*xh)/sum(xh*xh) nt = float(self.N) f_err = sqrt(-2./nt * pmax/aa*(1.-pmax)/pmax) Psin_err = sqrt(-2./nt* pmax/aa*(1.-pmax)/pmax) / self.freq[bbin]**2 else: f_err = None Psin_err = None fbest = self.freq[bbin] amp = sqrt(self._a[bbin]**2 + self._b[bbin]**2) ph = arctan2(self._a[bbin], self._b[bbin]) / (2.*pi) T0 = min(self.th) - ph/fbest # Re-add the mean offset = self._off[bbin] + self._Y # Statistics print "Generalized LS - statistical output" print 33*"-" if verbose: print "Number of input points: %6d" % (nt) print "Weighted mean of dataset: % e" % (self._Y) print "Weighted rms of dataset: % e" % (sqrt(self._YY)) print "Time base: % e" % (max(self.th) - min(self.th)) print "Number of frequency points: %6d" % (len(self.freq)) print print "Maximum power, p : % e " % (self.power[bbin]) print "Maximum power (without normalization): %e" % (pmax) print "Normalization : ", self.norm print "RMS of residuals : % e " % (rms) if self.error is not None: print " Mean weighted internal error: % e" %(sqrt(nt/sum(1./self.error**2))) print "Best sine frequency : % e +/- % e" % (fbest, f_err) print "Best sine period : % e +/- % e" % (1./fbest, Psin_err) print "Amplitude: : % e +/- % e" % (amp, sqrt(2./nt)*rms) print "Phase (ph) : % e +/- % e" % (ph, sqrt(2./nt)*rms/amp/(2.*pi)) print "Phase (T0) : % e +/- % e" % (T0, sqrt(2./nt)*rms/amp/(2.*pi)/fbest) print "Offset : % e +/- % e" % (offset, sqrt(1./nt)*rms) print 60*"-" else: print "Input points: %6d, frequency points: %6d" % (nt, len(self.freq)) print print "Maximum power : %f " % (self.power[bbin]) print blue("Best sine period") + ": %f +/- %f" % (1./fbest, Psin_err)