def test_obsnum_increment(self): dt = self.length jds = n.arange(0, 10) * dt + self.jd obsnums = [] for jd in jds: obsnums.append(jdpol2obsnum(jd, self.pol, dt)) delta = n.diff(obsnums) for d in delta: self.assertEqual(d, 1) obsnum = self.add_an_obs() # obsnum = self.dbi.add_observation(self.obsnum, self.jd, self.date_type, self.pol, self.filename, self.host, length=self.length, status='NEW') self.assertEqual(float(obsnum), jdpol2obsnum(self.jd, self.pol, self.length))
def test_list_observations(self): # form up the observation list obslist = [] jds = n.arange(0, 10) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for pol in pols: for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.dbi.delete_obs(str(obsnum)) # Delete obseration if it exists before adding a new one obslist.append({'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': self.filename, 'length': self.length, 'status': 'UV_POT'}) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi < len(jds[:-1]): obslist[-1]['neighbor_high'] = jds[jdi + 1] obsnums = self.dbi.add_observations(obslist) float_obsnums = [] # Jon : I have to convert some stuffs back to float as obsnum is now type VarChar(100) to handle different types of obsid's for obs in obsnums: float_obsnums.append(float(obs)) # tic = time.time() observations = self.dbi.list_observations() float_observations = [] for obs in observations: float_observations.append(float(obs)) self.assertEqual(n.sum(n.array(float_observations) - n.array(float_obsnums)), 0)
def Add_Fake_Observations(self, nobs, npols): # form up the observation list obslist = [] jds = n.arange(0, nobs) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for i, pol in enumerate(pols): if i >= npols: # Jon: ?? continue for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.delete_obs(str(obsnum)) # Delete obseration if it exists before adding a new one obslist.append({'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': 'zen.{jd}.uv'.format(jd=n.round(jds[jdi], 5)), 'length': self.length, 'status': self.defaultstatus}) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi < len(jds[:-1]): obslist[-1]['neighbor_high'] = jds[jdi + 1] self.add_observations(obslist, status=self.defaultstatus)
def Add_Fake_Observations(self, nobs, npols): # form up the observation list obslist = [] jds = n.arange(0, nobs) * self.length + 2456446.1234 pols = ["xx", "yy", "xy", "yx"] for i, pol in enumerate(pols): if i >= npols: # Jon: ?? continue for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.delete_obs(str(obsnum)) # Delete obseration if it exists before adding a new one obslist.append( { "obsnum": str(obsnum), "outputhost": "UNITTEST", "date": str(jds[jdi]), "date_type": self.date_type, "pol": pol, "host": self.host, "filename": "zen.{jd}.uv".format(jd=n.round(jds[jdi], 5)), "length": self.length, "status": self.defaultstatus, } ) if jdi != 0: obslist[-1]["neighbor_low"] = jds[jdi - 1] if jdi < len(jds[:-1]): obslist[-1]["neighbor_high"] = jds[jdi + 1] self.add_observations(obslist, status=self.defaultstatus)
def test_add_observation(self): """ use the dbi to create a record. basically tests the same as test_Observation_and_file but with the dbi wrapper """ obsnum = self.add_an_obs() # obsnum = self.dbi.add_observation(self.obsnum, self.jd, self.date_type, self.pol, self.filename, self.host, length=self.length, status='NEW') OBS = self.session.query(Observation).filter(Observation.obsnum == obsnum).one() self.assertEqual(float(OBS.date), self.jd) self.assertEqual(float(OBS.obsnum), jdpol2obsnum(self.jd, self.pol, self.length))
def test_add_observation(self): """ use the dbi to create a record. basically tests the same as test_Observation_and_file but with the dbi wrapper """ obsnum = self.add_an_obs() # obsnum = self.dbi.add_observation(self.obsnum, self.jd, self.date_type, self.pol, self.filename, self.host, length=self.length, status='NEW') OBS = self.session.query(Observation).filter( Observation.obsnum == obsnum).one() self.assertEqual(float(OBS.date), self.jd) self.assertEqual(float(OBS.obsnum), jdpol2obsnum(self.jd, self.pol, self.length))
def setUp(self): """ create an in memory DB and open a connection to it """ # filename = os.path.dirname(__file__) + '/../configs/test.cfg' self.dbi = DataBaseInterface(dbhost="localhost", dbport="5432", dbtype="postgresql", dbname="test", dbuser="******", dbpasswd="testme") #self.dbi = DataBaseInterface("", "", "", "", "", "", test=True) # Jon: Change me self.session = self.dbi.Session() self.jd = 2456892.20012000 self.pol = 'xx' self.filename = '/data0/zen.2456785.123456.uv' self.host = 'pot0' self.length = 10 / 60. / 24 self.date_type = 'julian' self.obsnum = jdpol2obsnum(self.jd, self.pol, self.length)
def setUp(self): """ create an in memory DB and open a connection to it """ # filename = os.path.dirname(__file__) + '/../configs/test.cfg' self.dbi = DataBaseInterface(dbhost="localhost", dbport="5432", dbtype="postgresql", dbname="test", dbuser="******", dbpasswd="testme") # self.dbi = DataBaseInterface("", "", "", "", "", "", test=True) # # Jon: Change me self.session = self.dbi.Session() self.jd = 2456892.20012000 self.pol = 'xx' self.filename = '/data0/zen.2456785.123456.uv' self.host = 'pot0' self.length = 10 / 60. / 24 self.date_type = 'julian' self.obsnum = jdpol2obsnum(self.jd, self.pol, self.length)
def test_add_observations(self): # form up the observation list obslist = [] jds = n.arange(0, 10) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for pol in pols: for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.dbi.delete_obs( str(obsnum) ) # Delete obseration if it exists before adding a new one obslist.append({ 'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': self.filename, 'length': self.length, 'status': 'UV_POT' }) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi < len(jds[:-1]): obslist[-1]['neighbor_high'] = jds[jdi + 1] obsnums = self.dbi.add_observations(obslist, status='UV_POT') nobs = self.session.query(func.count(Observation.obsnum)).scalar() self.assertEqual(len(obslist) + 1, int(nobs)) # did we add observations? # did they get the proper neighbor assignments for obsnum in obsnums: OBS = self.session.query(Observation).filter( Observation.obsnum == obsnum).one() # find the original record we put into add_observations and check that the neighbors match for obs in obslist: if float(obs['date']) == float(OBS.date): if 'neighbor_low' in obs: self.assertEqual(float(OBS.low_neighbors[0].date), round(float(obs['neighbor_low']), 5)) if 'neighbor_high' in obs: self.assertEqual(float(OBS.high_neighbors[0].date), round(float(obs['neighbor_high']), 5)) break
def test_get_neighbors(self): """ First set up a likely triplet of observations """ # form up the observation list obslist = [] jds = n.arange(0, 10) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for i, pol in enumerate(pols): for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.dbi.delete_obs( str(obsnum) ) # Delete obseration if it exists before adding a new one obslist.append({ 'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': self.filename, 'length': self.length, 'status': 'UV_POT' }) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi != (len(jds) - 1): obslist[-1]['neighbor_high'] = jds[jdi + 1] obsnums = self.dbi.add_observations(obslist) obsnums.sort() i = 5 # I have ten time stamps. this guys should have plenty of neighbors mytestobsnum = obsnums[i] # choose a middle obs tic = time.time() neighbors = self.dbi.get_neighbors(mytestobsnum) # print "time to execute get_neighbors",time.time()-tic,'s' self.assertEqual(len(neighbors), 2) self.assertEqual(neighbors[0], obsnums[i - 1]) # low self.assertEqual(neighbors[1], obsnums[i + 1]) # high
def test_list_observations(self): # form up the observation list obslist = [] jds = n.arange(0, 10) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for pol in pols: for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.dbi.delete_obs( str(obsnum) ) # Delete obseration if it exists before adding a new one obslist.append({ 'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': self.filename, 'length': self.length, 'status': 'UV_POT' }) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi < len(jds[:-1]): obslist[-1]['neighbor_high'] = jds[jdi + 1] obsnums = self.dbi.add_observations(obslist) float_obsnums = [] # Jon : I have to convert some stuffs back to float as obsnum is now type VarChar(100) to handle different types of obsid's for obs in obsnums: float_obsnums.append(float(obs)) # tic = time.time() observations = self.dbi.list_observations() float_observations = [] for obs in observations: float_observations.append(float(obs)) self.assertEqual( n.sum(n.array(float_observations) - n.array(float_obsnums)), 0)
def test_get_neighbors(self): """ First set up a likely triplet of observations """ # form up the observation list obslist = [] jds = n.arange(0, 10) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for i, pol in enumerate(pols): for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.dbi.delete_obs(str(obsnum)) # Delete obseration if it exists before adding a new one obslist.append({'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': self.filename, 'length': self.length, 'status': 'UV_POT'}) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi != (len(jds) - 1): obslist[-1]['neighbor_high'] = jds[jdi + 1] obsnums = self.dbi.add_observations(obslist) obsnums.sort() i = 5 # I have ten time stamps. this guys should have plenty of neighbors mytestobsnum = obsnums[i] # choose a middle obs tic = time.time() neighbors = self.dbi.get_neighbors(mytestobsnum) # print "time to execute get_neighbors",time.time()-tic,'s' self.assertEqual(len(neighbors), 2) self.assertEqual(neighbors[0], obsnums[i - 1]) # low self.assertEqual(neighbors[1], obsnums[i + 1]) # high
def test_add_observations(self): # form up the observation list obslist = [] jds = n.arange(0, 10) * self.length + 2456446.1234 pols = ['xx', 'yy', 'xy', 'yx'] for pol in pols: for jdi in xrange(len(jds)): obsnum = jdpol2obsnum(jdi, pol, self.length) self.dbi.delete_obs(str(obsnum)) # Delete obseration if it exists before adding a new one obslist.append({'obsnum': str(obsnum), 'outputhost': "UNITTEST", 'date': str(jds[jdi]), 'date_type': self.date_type, 'pol': pol, 'host': self.host, 'filename': self.filename, 'length': self.length, 'status': 'UV_POT'}) if jdi != 0: obslist[-1]['neighbor_low'] = jds[jdi - 1] if jdi < len(jds[:-1]): obslist[-1]['neighbor_high'] = jds[jdi + 1] obsnums = self.dbi.add_observations(obslist, status='UV_POT') nobs = self.session.query(func.count(Observation.obsnum)).scalar() self.assertEqual(len(obslist) + 1, int(nobs)) # did we add observations? # did they get the proper neighbor assignments for obsnum in obsnums: OBS = self.session.query(Observation).filter(Observation.obsnum == obsnum).one() # find the original record we put into add_observations and check that the neighbors match for obs in obslist: if float(obs['date']) == float(OBS.date): if 'neighbor_low' in obs: self.assertEqual(float(OBS.low_neighbors[0].date), round(float(obs['neighbor_low']), 5)) if 'neighbor_high' in obs: self.assertEqual(float(OBS.high_neighbors[0].date), round(float(obs['neighbor_high']), 5)) break
print(" %s") % night for night in nights: print("adding night %s") % night obsinfo = [] nightfiles = [ filename for filename in args if int(float(file2jd(filename))) == night ] print len(nightfiles) for pol in pols: # filter off all pols but the one I'm currently working on files = sorted( [filename for filename in nightfiles if file2pol(filename) == pol]) for i, filename in enumerate(files): obsnum = str( jdpol2obsnum(float(file2jd(filename)), file2pol(filename), djd)) try: dbi.get_obs(obsnum) if opts.overwrite: raise (Exception) print filename, "found in db, skipping" except: obsinfo.append({ 'obsnum': obsnum, 'date': float(file2jd(filename)), 'date_type': "julian", 'pol': file2pol(filename), 'host': socket.gethostname(), 'filename': filename, 'outputhost': '', 'status': '',
def main (args): o = optparse.OptionParser () o.set_usage ('load_observations_librarian.py') o.set_description (__doc__) o.add_option('--connection', help='the name of the Librarian connection to use (specified in .hl_client.cfg)') opts, args = o.parse_args (args) # Some boilerplate to set up the database interface ... spawner = SpawnerClass() workflow = WorkFlow() spawner.config_file = os.path.join (basedir, 'etc/still.cfg') process_client_config_file (spawner, workflow) dbi = get_dbi_from_config (spawner.config_file) dbi.test_db () # Get the list of potentially-relevant files from the Librarian. lc = hera_librarian.LibrarianClient (opts.connection) try: listing = lc.list_files_without_history_item (librarian_source, rtp_processed_key) except hera_librarian.RPCFailedError as e: print ('RPC to librarian failed: %s' % e.message) sys.exit (1) try: files = listing['files'] len (files) except Exception as e: print ('unexpected response from librarian: %s' % e) sys.exit (1) if not len (files): print ('No new files.') return # for each file we should have a dict of at least: # # name -- something like 2456892/zen.2456892.49664.xx.uv # obsid -- the obsid associated with this file # create_time -- the Unix timestamp that the file was sent to the Librarian # size -- file size in bytes # type -- file "type" stored in the Librarian; "uv" for UV data # md5 -- the MD5 of the file contents; XXX may be calculated weirdly by Librarian # store_ssh_prefix -- the Librarian "ssh_prefix" of the file's storage location # store_path_prefix -- the Librarian "path_prefix" of the file's storage location # # We start by extracting a few useful pieces of meta-information: for f in files: f['jd'] = float (path_to_jd (f['name'])) f['pol'] = path_to_pol (f['name']) # Extract the hostname of the store on which this file is stored from # its store's ssh_prefix. The prefix will look like "user@host", # but the "user@" part might not be present. f['store_host'] = f['store_ssh_prefix'].split ('@', 1)[-1] # Meanwhile, the RTP system expects the filenames to be absolute paths. f['name'] = f['store_path_prefix'] + '/' + f['name'] # If at all possible, get a default observation length from the # separations between observations, in case we have any funky datasets # without the length embedded. We are somewhat recklessly assuming that # even if this batch of datasets spans different nights, they all will # have the same DJD; this doesn't seem too unreasonable. pols = list (set (f['pol'] for f in files)) bestjds = [] for pol in pols: jds = np.sort ([f['jd'] for f in files if f['pol'] == pol]) if len (jds) > len (bestjds): bestjds = jds default_djd = None if len (bestjds) > 2: default_djd = np.median (np.diff (bestjds)) print ('Inferring default djd = %.5f days' % default_djd) for f in files: f['djd'] = default_djd # Buuut let's get djd straight from the data if at all possible. If there # are any files for which we have no idea about the djd, we can't add # them. For everything else, no we can compute the 'obsnum' magic number # (which is not the same as obsid!). for f in files: djd = try_get_file_djd (f) if djd is not None: f['djd'] = djd files = [f for f in files if f.get ('djd') is not None] for f in files: f['obsnum'] = str (jdpol2obsnum (f['jd'], f['pol'], f['djd'])) # Now let's fill in the "neighbor" information. XXX: if we only get, say, # a random subset of observations from one night, this information will be # grievously incomplete! I don't see a way around that given the way that # this aspect of things is handled at the moment. for pol in pols: sfiles = sorted ((f for f in files if f['pol'] == pol), key=lambda f: f['jd']) for i in xrange (len (sfiles)): f_this = sfiles[i] if i > 0: f_prev = sfiles[i - 1] if (f_this['jd'] - f_prev['jd']) < (1.2 * f_this['djd']): f_this['neighbor_low'] = f_prev['jd'] if i < len (sfiles) - 1: f_next = sfiles[i + 1] if (f_next['jd'] - f_this['jd']) < (1.2 * f_this['djd']): f_this['neighbor_high'] = f_next['jd'] # Now that we've computed everything, avoid duplicating files that we # already know about. from sqlalchemy.orm.exc import NoResultFound def not_already_seen (filerec): try: obs = dbi.get_obs (filerec['obsnum']) print (repr (obs)) return False except NoResultFound: return True n_before = len (files) files = [f for f in files if not_already_seen (f)] if len (files) != n_before: print ('Dropping %d already-ingested files.' % (n_before - len (files))) if not len (files): print ('Nothing to add.') return # Let's go for it. try: print ('Attempting to add %d observations to the still ...' % len (files)) dbi.add_observations ([augmented_file_to_obsinfo (f) for f in files], initial_status) except Exception as e: print ('addition failed! here\'s what was attempted:', file=sys.stderr) for f in files: print ('', file=sys.stderr) print (augmented_file_to_obsinfo (f), file=sys.stderr) raise
print(" %s") % pol print("found the following nights:") for night in nights: print(" %s") % night for night in nights: print("adding night %s") % night obsinfo = [] nightfiles = [filename for filename in args if int(float(file2jd(filename))) == night] print len(nightfiles) for pol in pols: files = [filename for filename in nightfiles if file2pol(filename) == pol] # filter off all pols but the one I'm currently working on files.sort() for i, filename in enumerate(files): obsnum = str(jdpol2obsnum(float(file2jd(filename)), file2pol(filename), djd)) try: dbi.get_obs(obsnum) if opts.overwrite: raise(StandardError) print filename, "found in db, skipping" except: obsinfo.append({ 'obsnum': obsnum, 'date': float(file2jd(filename)), 'date_type': "julian", 'pol': file2pol(filename), 'host': socket.gethostname(), 'filename': filename, 'outputhost': '', 'status': '',