def __init__(self, arrays=None): ''' Default the data stream to be an empty string; otherwise, use the contents of `fits_stream`, which should be a NumPy array. ''' if arrays is None: self.dstream1 = '' self.dstream2 = '' self.dstream3 = '' else: time, flux, fluxerr = arrays self.dstream1 = encode_array(time) self.dstream2 = encode_array(flux) self.dstream3 = encode_array(fluxerr)
def idxmerge(cdbname, idxstomerge, verbose=0): # Count all the unique locations and assign new document ids. idxorder = {} loc2docid = {} for (i,idx) in enumerate(reversed(idxstomerge)): idx.assignnewids1(loc2docid) idxorder[idx] = i n = len(loc2docid) loc2docid = dict( (loc,n-docid) for (loc,docid) in loc2docid.iteritems() ) for idx in idxstomerge: idx.assignnewids2(loc2docid) # Create a new index file. maker = cdb.cdbmake(cdbname, cdbname+'.tmp') if verbose: print >>sys.stderr, 'Merging: %r (docs=%d, est. terms=%d): %r' % \ (cdbname, sum( idx.ndocs for idx in idxstomerge ), estimate_terms( idx.nterms for idx in idxstomerge ), idxstomerge) # Copy sentences to a new index file with unique ids. for idx in idxstomerge: idx.copysents(maker) # Merge document ids and offsets. nterms = 0 docid2info = [] for (k,vs) in cdbmerge(idxstomerge): if k[0] == PROP_LOC or k[0] == PROP_IDXINFO: break if k[0] == PROP_DOCID: # read a docid->loc mapping (oldid,) = unpack('>xi', k) for (info,idx) in vs: if oldid not in idx.old2new: continue newid = idx.old2new[oldid] docid2info.append((newid, info)) assert loc2docid[info[4:]] == newid else: # merge docid+pos sets vs = sorted(( (idxorder[idx], idx.convertoldids(v)) for (v,idx) in vs )) ents = sum( len(a) for (_,a) in vs )/2 (_,r) = vs.pop(0) for (_,a) in vs: r.extend(a) maker.add(k, encode_array(ents, r)) nterms += 1 if verbose and nterms % 1000 == 0: sys.stderr.write('.'); sys.stderr.flush() # write docid->loc mappings (avoiding dupes) docid2info.sort() for (docid,info) in docid2info: maker.add(pack('>ci', PROP_DOCID, docid), info) # write loc->docid mappings (avoiding dupes) for (loc,docid) in sorted(loc2docid.iteritems()): if loc: maker.add(PROP_LOC+loc, pack('>i', docid)) if verbose: print >>sys.stderr, 'done: docs=%d, terms=%d' % (len(docid2info), nterms) maker.add(PROP_IDXINFO, pack('>ii', len(docid2info), nterms)) maker.finish() return
def flush(self): if not self.docinfo: return assert self.maker != None t0 = time.time() # All keys must be lexically sorted except the last one. # DocID -> Document. self.docinfo.sort(key=lambda (docid,_): docid) # Term -> pos. nrefs = 0 for w in sorted(self.terms.iterkeys()): occs = self.terms[w] occs.sort(reverse=True) a = array('i') for (docid,pos) in occs: a.append(docid) a.append(pos) self.maker.add(w, encode_array(len(occs), a)) nrefs += len(occs) # location -> DocID for (docid,doc) in self.docinfo: self.maker.add(pack('>ci', PROP_DOCID, docid), pack('>i', doc.get_mtime())+doc.loc) self.docinfo.sort(key=lambda (_,doc): doc.loc) # DocID -> location for (docid,doc) in self.docinfo: self.maker.add(PROP_LOC+doc.loc, pack('>i', docid)) # The number of documents self.maker.add(PROP_IDXINFO, pack('>ii', len(self.docinfo), len(self.terms))) self.maker.finish() self.maker = None if self.verbose: t = time.time() - t0 print >>sys.stderr, 'docs=%d, keys=%d, refs=%d, time=%.1fs(%.1fdocs/s), memory=%s' % \ (len(self.docinfo), len(self.terms), nrefs, t, len(self.docinfo)/t, linux_process_memory()) # Clear the files and terms. self.docinfo = [] self.terms.clear() return
def idxmerge(cdbname, idxstomerge, verbose=0): # Count all the unique locations and assign new document ids. idxorder = {} loc2docid = {} for (i, idx) in enumerate(reversed(idxstomerge)): idx.assignnewids1(loc2docid) idxorder[idx] = i n = len(loc2docid) loc2docid = dict( (loc, n - docid) for (loc, docid) in loc2docid.iteritems()) for idx in idxstomerge: idx.assignnewids2(loc2docid) # Create a new index file. maker = cdb.cdbmake(cdbname, cdbname + '.tmp') if verbose: print >>sys.stderr, 'Merging: %r (docs=%d, est. terms=%d): %r' % \ (cdbname, sum( idx.ndocs for idx in idxstomerge ), estimate_terms( idx.nterms for idx in idxstomerge ), idxstomerge) # Copy sentences to a new index file with unique ids. for idx in idxstomerge: idx.copysents(maker) # Merge document ids and offsets. nterms = 0 docid2info = [] for (k, vs) in cdbmerge(idxstomerge): if k[0] == PROP_LOC or k[0] == PROP_IDXINFO: break if k[0] == PROP_DOCID: # read a docid->loc mapping (oldid, ) = unpack('>xi', k) for (info, idx) in vs: if oldid not in idx.old2new: continue newid = idx.old2new[oldid] docid2info.append((newid, info)) assert loc2docid[info[4:]] == newid else: # merge docid+pos sets vs = sorted( ((idxorder[idx], idx.convertoldids(v)) for (v, idx) in vs)) ents = sum(len(a) for (_, a) in vs) / 2 (_, r) = vs.pop(0) for (_, a) in vs: r.extend(a) maker.add(k, encode_array(ents, r)) nterms += 1 if verbose and nterms % 1000 == 0: sys.stderr.write('.') sys.stderr.flush() # write docid->loc mappings (avoiding dupes) docid2info.sort() for (docid, info) in docid2info: maker.add(pack('>ci', PROP_DOCID, docid), info) # write loc->docid mappings (avoiding dupes) for (loc, docid) in sorted(loc2docid.iteritems()): if loc: maker.add(PROP_LOC + loc, pack('>i', docid)) if verbose: print >> sys.stderr, 'done: docs=%d, terms=%d' % (len(docid2info), nterms) maker.add(PROP_IDXINFO, pack('>ii', len(docid2info), nterms)) maker.finish() return
def main(): ''' Main function for this module. Parses all command line arguments, reads in data from stdin, and sends it to the proper BLS algorithm. ''' # This is a global list of default values that will be used by the argument parser # and the configuration parser. defaults = { 'min_duration': '0.0416667', 'max_duration': '0.5', 'n_bins': '100', 'direction': '0', 'mode': 'vec', 'print_format': 'encoded', 'verbose': '0', 'profiling': '0' } # Set up the parser for command line arguments and read them. parser = __init_parser(defaults) args = parser.parse_args() if not args.config: # No configuration file specified -- read in command line arguments. if not args.segment: parser.error( 'No trial segment specified and no configuration file given.') segment = args.segment mindur = args.mindur maxdur = args.maxdur nbins = args.nbins direction = args.direction mode = args.mode fmt = args.fmt verbose = args.verbose profile = args.profile else: # Configuration file was given; read in that instead. cp = SafeConfigParser(defaults) cp.read(args.config) segment = cp.getfloat('DEFAULT', 'segment') mindur = cp.getfloat('DEFAULT', 'min_duration') maxdur = cp.getfloat('DEFAULT', 'max_duration') nbins = cp.getint('DEFAULT', 'n_bins') direction = cp.getint('DEFAULT', 'direction') mode = cp.get('DEFAULT', 'mode') fmt = cp.get('DEFAULT', 'print_format') verbose = cp.getboolean('DEFAULT', 'verbose') profile = cp.getboolean('DEFAULT', 'profiling') # Perform any sanity-checking on the arguments. __check_args(segment, mindur, maxdur, nbins, direction) # Send the data to the algorithm. for k, q, time, flux, fluxerr in read_mapper_output(sys.stdin): # Extract the array columns. time = np.array(time, dtype='float64') flux = np.array(flux, dtype='float64') fluxerr = np.array(fluxerr, dtype='float64') if profile: # Turn on profiling. pr = cProfile.Profile() pr.enable() if mode == 'python': raise NotImplementedError out = bls_pulse_python(time, flux, fluxerr, nbins, segment, mindur, maxdur, direction=direction) elif mode == 'vec': raise NotImplementedError out = bls_pulse_vec(time, flux, fluxerr, nbins, segment, mindur, maxdur, direction=direction) elif mode == 'cython': out = bls_pulse_cython(time, flux, fluxerr, nbins, segment, mindur, maxdur, direction=direction) else: raise ValueError('Invalid mode: %s' % mode) if profile: # Turn off profiling. pr.disable() ps = pstats.Stats(pr, stream=sys.stderr).sort_stats('time') ps.print_stats() if direction == 2: srsq_dip = out['srsq_dip'] duration_dip = out['duration_dip'] depth_dip = out['depth_dip'] midtime_dip = out['midtime_dip'] srsq_blip = out['srsq_blip'] duration_blip = out['duration_blip'] depth_blip = out['depth_blip'] midtime_blip = out['midtime_blip'] segstart = out['segstart'] segend = out['segend'] # Print output. if fmt == 'encoded': print "\t".join([ k, q, encode_array(segstart), encode_array(segend), encode_array(srsq_dip), encode_array(duration_dip), encode_array(depth_dip), encode_array(midtime_dip), encode_array(srsq_blip), encode_array(duration_blip), encode_array(depth_blip), encode_array(midtime_blip) ]) elif fmt == 'normal': print "-" * 120 print "Kepler " + k print "Quarters: " + q print "-" * 120 print '{0: <7s} {1: <13s} {2: <13s} {3: <13s} {4: <13s} {5: <13s} {6: <13s} {7: <13s} ' \ '{8: <13s}'.format('Segment', 'Dip SR^2', 'Dip dur.', 'Dip depth', 'Dip mid.', 'Blip SR^2', 'Blip dur.', 'Blip depth', 'Blip mid.') for i in xrange(len(srsq_dip)): print '{0: <7d} {1: <13.6f} {2: <13.6f} {3: <13.6f} {4: <13.6f} ' \ '{5: <13.6f} {6: <13.6f} {7: <13.6f} {8: <13.6f}'.format(i, srsq_dip[i], duration_dip[i], depth_dip[i], midtime_dip[i], srsq_blip[i], duration_blip[i], depth_blip[i], midtime_blip[i]) print "-" * 120 print print else: srsq = out['srsq'] duration = out['duration'] depth = out['depth'] midtime = out['midtime'] segstart = out['segstart'] segend = out['segend'] # Print output. if fmt == 'encoded': print "\t".join([ k, q, encode_array(segstart), encode_array(segend), encode_array(srsq), encode_array(duration), encode_array(depth), encode_array(midtime) ]) elif fmt == 'normal': print "-" * 80 print "Kepler " + k print "Quarters: " + q print "-" * 80 print '{0: <7s} {1: <13s} {2: <10s} {3: <9s} {4: <13s}'.format( 'Segment', 'SR^2', 'Duration', 'Depth', 'Midtime') for i in xrange(len(srsq)): print '{0: <7d} {1: <13.6f} {2: <10.6f} {3: <9.6f} {4: <13.6f}'.format( i, srsq[i], duration[i], depth[i], midtime[i]) print "-" * 80 print print
def main(): ''' Main function for this module. Parses all command line arguments, reads in data from stdin, and sends it to the proper BLS algorithm. ''' # This is a global list of default values that will be used by the argument parser # and the configuration parser. defaults = {'min_duration':'0.0416667', 'max_duration':'0.5', 'n_bins':'100', 'direction':'0', 'mode':'vec', 'print_format':'encoded', 'verbose':'0', 'profiling':'0'} # Set up the parser for command line arguments and read them. parser = __init_parser(defaults) args = parser.parse_args() if not args.config: # No configuration file specified -- read in command line arguments. if not args.segment: parser.error('No trial segment specified and no configuration file given.') segment = args.segment mindur = args.mindur maxdur = args.maxdur nbins = args.nbins direction = args.direction mode = args.mode fmt = args.fmt verbose = args.verbose profile = args.profile else: # Configuration file was given; read in that instead. cp = SafeConfigParser(defaults) cp.read(args.config) segment = cp.getfloat('DEFAULT', 'segment') mindur = cp.getfloat('DEFAULT', 'min_duration') maxdur = cp.getfloat('DEFAULT', 'max_duration') nbins = cp.getint('DEFAULT', 'n_bins') direction = cp.getint('DEFAULT', 'direction') mode = cp.get('DEFAULT', 'mode') fmt = cp.get('DEFAULT', 'print_format') verbose = cp.getboolean('DEFAULT', 'verbose') profile = cp.getboolean('DEFAULT', 'profiling') # Perform any sanity-checking on the arguments. __check_args(segment, mindur, maxdur, nbins, direction) # Send the data to the algorithm. for k, q, time, flux, fluxerr in read_mapper_output(sys.stdin): # Extract the array columns. time = np.array(time, dtype='float64') flux = np.array(flux, dtype='float64') fluxerr = np.array(fluxerr, dtype='float64') if profile: # Turn on profiling. pr = cProfile.Profile() pr.enable() if mode == 'python': raise NotImplementedError out = bls_pulse_python(time, flux, fluxerr, nbins, segment, mindur, maxdur, direction=direction) elif mode == 'vec': raise NotImplementedError out = bls_pulse_vec(time, flux, fluxerr, nbins, segment, mindur, maxdur, direction=direction) elif mode == 'cython': out = bls_pulse_cython(time, flux, fluxerr, nbins, segment, mindur, maxdur, direction=direction) else: raise ValueError('Invalid mode: %s' % mode) if profile: # Turn off profiling. pr.disable() ps = pstats.Stats(pr, stream=sys.stderr).sort_stats('time') ps.print_stats() if direction == 2: srsq_dip = out['srsq_dip'] duration_dip = out['duration_dip'] depth_dip = out['depth_dip'] midtime_dip = out['midtime_dip'] srsq_blip = out['srsq_blip'] duration_blip = out['duration_blip'] depth_blip = out['depth_blip'] midtime_blip = out['midtime_blip'] segstart = out['segstart'] segend = out['segend'] # Print output. if fmt == 'encoded': print "\t".join([k, q, encode_array(segstart), encode_array(segend), encode_array(srsq_dip), encode_array(duration_dip), encode_array(depth_dip), encode_array(midtime_dip), encode_array(srsq_blip), encode_array(duration_blip), encode_array(depth_blip), encode_array(midtime_blip)]) elif fmt == 'normal': print "-" * 120 print "Kepler " + k print "Quarters: " + q print "-" * 120 print '{0: <7s} {1: <13s} {2: <13s} {3: <13s} {4: <13s} {5: <13s} {6: <13s} {7: <13s} ' \ '{8: <13s}'.format('Segment', 'Dip SR^2', 'Dip dur.', 'Dip depth', 'Dip mid.', 'Blip SR^2', 'Blip dur.', 'Blip depth', 'Blip mid.') for i in xrange(len(srsq_dip)): print '{0: <7d} {1: <13.6f} {2: <13.6f} {3: <13.6f} {4: <13.6f} ' \ '{5: <13.6f} {6: <13.6f} {7: <13.6f} {8: <13.6f}'.format(i, srsq_dip[i], duration_dip[i], depth_dip[i], midtime_dip[i], srsq_blip[i], duration_blip[i], depth_blip[i], midtime_blip[i]) print "-" * 120 print print else: srsq = out['srsq'] duration = out['duration'] depth = out['depth'] midtime = out['midtime'] segstart = out['segstart'] segend = out['segend'] # Print output. if fmt == 'encoded': print "\t".join([k, q, encode_array(segstart), encode_array(segend), encode_array(srsq), encode_array(duration), encode_array(depth), encode_array(midtime)]) elif fmt == 'normal': print "-" * 80 print "Kepler " + k print "Quarters: " + q print "-" * 80 print '{0: <7s} {1: <13s} {2: <10s} {3: <9s} {4: <13s}'.format('Segment', 'SR^2', 'Duration', 'Depth', 'Midtime') for i in xrange(len(srsq)): print '{0: <7d} {1: <13.6f} {2: <10.6f} {3: <9.6f} {4: <13.6f}'.format(i, srsq[i], duration[i], depth[i], midtime[i]) print "-" * 80 print print
def main(): ''' Main function for this module. Parses all command line arguments, reads in data from stdin, and sends it to the proper BLS algorithm. ''' # This is a global list of default values that will be used by the # argument parser and the configuration parser. defaults = {'min_duration':'0.0416667', 'max_duration':'0.5', 'n_bins':'100', 'direction':'0', 'print_format':'encoded', 'verbose':'0', 'profiling':'0', 'clean_max':'5', 'fits_output':'1', 'fits_dir':'', 'model_type':'box'} # Set up the parser for command line arguments and read them. parser = __init_parser(defaults) args = parser.parse_args() cfg = dict() if not args.config: # No configuration file specified -- read in command line arguments. if not args.segment: parser.error('No trial segment specified and no configuration ' 'file given.') cfg['segment'] = args.segment cfg['mindur'] = args.mindur cfg['maxdur'] = args.maxdur cfg['nbins'] = args.nbins cfg['direction'] = args.direction cfg['fmt'] = args.fmt cfg['verbose'] = args.verbose cfg['profile'] = args.profile cfg['clean_max'] = args.clean_max cfg['fitsout'] = args.fitsout cfg['fitsdir'] = args.fitsdir cfg['model'] = args.model_type else: # Configuration file was given; read it instead. cp = ConfigParser(defaults) cp.read(args.config) cfg['segment'] = cp.getfloat('DEFAULT', 'segment') cfg['mindur'] = cp.getfloat('DEFAULT', 'min_duration') cfg['maxdur'] = cp.getfloat('DEFAULT', 'max_duration') cfg['nbins'] = cp.getint('DEFAULT', 'n_bins') cfg['direction'] = cp.getint('DEFAULT', 'direction') cfg['fmt'] = cp.get('DEFAULT', 'print_format') cfg['verbose'] = cp.getboolean('DEFAULT', 'verbose') cfg['profile'] = cp.getboolean('DEFAULT', 'profiling') cfg['clean_max'] = cp.getint('DEFAULT', 'clean_max') cfg['fitsout'] = cp.getboolean('DEFAULT', 'fits_output') cfg['fitsdir'] = cp.get('DEFAULT', 'fits_dir') cfg['model'] = cp.get('DEFAULT', 'model_type') if cfg['fitsout'] and cfg['fitsdir'] == '': parser.error('No FITS output directory specified.') # Perform any sanity-checking on the arguments. __check_args(cfg['segment'], cfg['mindur'], cfg['maxdur'], cfg['nbins'], cfg['direction']) # Send the data to the algorithm. for k, q, time, flux, fluxerr in read_mapper_output(sys.stdin): logger.info('Beginning analysis for ' + k) # Extract the array columns. time = np.array(time, dtype='float64') flux = np.array(flux, dtype='float64') fluxerr = np.array(fluxerr, dtype='float64') # Don't assume the times are sorted already! ndx = np.argsort(time) time = time[ndx] flux = flux[ndx] fluxerr = fluxerr[ndx] if cfg['profile']: # Turn on profiling. pr = cProfile.Profile() pr.enable() if cfg['fitsout']: # Set up the FITS bundler. bundler = BLSFitsBundler() bundler.make_header(k) clean_out = None for i in xrange(cfg['clean_max']): # Do ALL detrending and binning here. The main algorithm # function is now separate from this functionality. dtime, dflux, dfluxerr, samples, segstart, segend = \ bin_and_detrend(time, flux, fluxerr, cfg['nbins'], cfg['segment'], detrend_order=3) if np.count_nonzero(~np.isnan(dflux)) == 0: logger.warning('Not enough points left to continue BLS pulse') bls_out = None break bls_out = bls_pulse(dtime, dflux, dfluxerr, samples, cfg['nbins'], cfg['segment'], cfg['mindur'], cfg['maxdur'], direction=cfg['direction']) if cfg['direction'] != 2: # Cleaning iterations currently won't work unless direction # is 2, so we don't loop in this case. break srsq_dip = bls_out['srsq_dip'] duration_dip = bls_out['duration_dip'] depth_dip = bls_out['depth_dip'] midtime_dip = bls_out['midtime_dip'] srsq_blip = bls_out['srsq_blip'] duration_blip = bls_out['duration_blip'] depth_blip = bls_out['depth_blip'] midtime_blip = bls_out['midtime_blip'] try: clean_out = clean_signal(time, flux, dtime, dflux, dfluxerr, bls_out, model=cfg['model']) except RuntimeError: break if cfg['fitsout']: ndx = np.where(np.isfinite(dflux)) bundler.push_detrended_lightcurve(dtime[ndx], dflux[ndx], dfluxerr[ndx], clean_out=clean_out) bundler.push_bls_output(bls_out, segstart, segend) if cfg['fitsout'] and bls_out is not None: # Save the detrended light curve and BLS output from the last # iteration. There won't be any output from `clean_signal`, # either because of the `direction` parameter or because there # are no more strong periodic signals. ndx = np.where(np.isfinite(dflux)) bundler.push_detrended_lightcurve(dtime[ndx], dflux[ndx], dfluxerr[ndx], clean_out=None) bundler.push_bls_output(bls_out, segstart, segend) if cfg['fitsout']: # Save the entire FITS file, including the configuration. bundler.push_config(cfg) outfile = os.path.abspath(os.path.expanduser(os.path.join( cfg['fitsdir'], 'KIC' + k + '.fits'))) bundler.write_file(outfile, clobber=True) if cfg['profile']: # Turn off profiling and print results to STDERR. pr.disable() ps = pstats.Stats(pr, stream=sys.stderr).sort_stats('time') ps.print_stats() if cfg['direction'] == 2: # Print output. if cfg['fmt'] == 'encoded': print "\t".join([k, q, encode_array(segstart), encode_array(segend), encode_array(srsq_dip), encode_array(duration_dip), encode_array(depth_dip), encode_array(midtime_dip), encode_array(srsq_blip), encode_array(duration_blip), encode_array(depth_blip), encode_array(midtime_blip)]) elif cfg['fmt'] == 'normal': print "-" * 120 print "Kepler " + k print "Quarters: " + q print "-" * 120 print '{0: <7s} {1: <13s} {2: <13s} {3: <13s} {4: <13s} ' \ '{5: <13s} {6: <13s} {7: <13s} {8: <13s}'.format('Segment', 'Dip SR^2', 'Dip dur.', 'Dip depth', 'Dip mid.', 'Blip SR^2', 'Blip dur.', 'Blip depth', 'Blip mid.') for i in xrange(len(srsq_dip)): print '{0: <7d} {1: <13.6f} {2: <13.6f} {3: <13.6f} ' \ '{4: <13.6f} {5: <13.6f} {6: <13.6f} {7: <13.6f} ' \ '{8: <13.6f}'.format(i, srsq_dip[i], duration_dip[i], depth_dip[i], midtime_dip[i], srsq_blip[i], duration_blip[i], depth_blip[i], midtime_blip[i]) print "-" * 120 print print elif cfg['fmt'] == 'outfile' and cfg['fitsout']: print outfile else: srsq = out['srsq'] duration = out['duration'] depth = out['depth'] midtime = out['midtime'] segstart = out['segstart'] segend = out['segend'] # Print output. if cfg['fmt'] == 'encoded': print "\t".join([k, q, encode_array(segstart), encode_array(segend), encode_array(srsq), encode_array(duration), encode_array(depth), encode_array(midtime)]) elif cfg['fmt'] == 'normal': print "-" * 80 print "Kepler " + k print "Quarters: " + q print "-" * 80 print '{0: <7s} {1: <13s} {2: <10s} {3: <9s} {4: <13s}'.format( 'Segment', 'SR^2', 'Duration', 'Depth', 'Midtime') for i in xrange(len(srsq)): print '{0: <7d} {1: <13.6f} {2: <10.6f} {3: <9.6f} ' \ '{4: <13.6f}'.format(i, srsq[i], duration[i], depth[i], midtime[i]) print "-" * 80 print print elif cfg['fmt'] == 'outfile' and cfg['fitsout']: print outfile