def activate(name, path, user, address, hostname): """Interactive routine for activating a storage node located at ROOT.""" # We need to write to the database. db.connect(read_write=True) try: node = di.StorageNode.get(name=name) except pw.DoesNotExist: print('Storage node "%s" does not exist. I quit.' % name) if node.active: print('Node "%s" is already active.' % name) return # Set the default hostname if required if hostname is None: hostname = socket.gethostname().split(".")[0] print('I will set the host to "%s".' % hostname) # Set the parameters of this node node.username = user node.address = address node.active = True node.host = hostname if path is not None: node.root = path node.save() print('Successfully activated "%s".' % name)
def db_conn(): """Set up chimedb.core for testing with a local dummy DB.""" (fd, rcfile) = tempfile.mkstemp(text=True) with os.fdopen(fd, "a") as rc: rc.write("""\ chimedb: db_type: MySQL db: test user_ro: test passwd_ro: test user_rw: test passwd_rw: test host: 127.0.0.1 port: 32574 """) # Tell chimedb where the database connection config is assert os.path.isfile(rcfile), "Could not find {}.".format(rcfile) os.environ["CHIMEDB_TEST_RC"] = rcfile # Make sure we don't write to the actual chime database os.environ["CHIMEDB_TEST_ENABLE"] = "Yes, please." db.connect() db.orm.create_tables(["chimedb.dataflag.opinion"]) # insert a user with password ****** pwd = ":B:0000ffff:e989651ffffcb5bf9b9abedfdab58460" db.mediawiki.MediaWikiUser.get_or_create(user_name=user, user_password=pwd)
def deactivate(root_or_name): """Deactivate a storage node with location or named ROOT_OR_NAME.""" # We need to write to the database. db.connect(read_write=True) try: node = di.StorageNode.get(name=root_or_name) except pw.DoesNotExist: if root_or_name[-1] == "/": root_or_name = root_or_name[: len(root_or_name) - 1] if not os.path.exists(root_or_name): print("That is neither a node name, nor a path on this host. " "I quit.") exit() try: node = di.StorageNode.get( root=root_or_name, host=socket.gethostname().split(".")[0] ) except pw.DoesNotExist: print( "That is neither a node name nor a root name that is " "known. I quit." ) exit() if not node.active: print("There is no node active there any more.") else: node.active = False node.save() print("Node successfully deactivated.")
def setUp(self): """Set up chimedb.core for testing with a local dummy DB.""" (fd, rcfile) = tempfile.mkstemp(text=True) with os.fdopen(fd, "a") as rc: rc.write("""\ chimedb: db_type: MySQL db: test user_ro: travis passwd_ro: "" user_rw: travis passwd_rw: "" host: 127.0.0.1 port: 3306 """) # Tell chimedb where the database connection config is assert os.path.isfile(rcfile), "Could not find {}.".format(rcfile) os.environ["CHIMEDB_TEST_RC"] = rcfile # Make sure we don't write to the actual chime database os.environ["CHIMEDB_TEST_ENABLE"] = "Yes, please." db.connect() db.orm.create_tables("chimedb.dataset") dget.index()
def db_conn(): """Set up chimedb.core for testing with a local dummy DB.""" (fd, rcfile) = tempfile.mkstemp(text=True) os.close(fd) # Tell chimedb where the database connection config is assert os.path.isfile(rcfile), "Could not find {}.".format(rcfile) os.environ["CHIMEDB_TEST_SQLITE"] = rcfile # Make sure we don't write to the actual chime database os.environ["CHIMEDB_TEST_ENABLE"] = "Yes, please." db.connect() db.orm.create_tables(["chimedb.dataflag.opinion"]) # insert a user pwd = ":B:0000ffff:e989651ffffcb5bf9b9abedfdab58460" db.mediawiki.MediaWikiUser.get_or_create(user_id=user_id, user_name=user, user_password=pwd) # insert a user with a password hash we don't understand pwd = "1 2 3 4" db.mediawiki.MediaWikiUser.get_or_create(user_id=1, user_name=fail_user, user_password=pwd) db.close() yield # tear down os.remove(rcfile)
def test_chimedb_test_rc(self): # Create an empty on-disk sqlite database (fd, dbfile) = tempfile.mkstemp(text=True) os.close(fd) # Create a rcfile (fd, rcfile) = tempfile.mkstemp(text=True) with os.fdopen(fd, "a") as rc: rc.write("""\ chimedb: db_type: sqlite db: {0} """.format(dbfile)) # This should be ignored os.environ["CHIMEDB_TEST_RC"] = rcfile db.test_enable() db.connect(read_write=True) db.proxy.create_tables([TestTable]) TestTable.create(datum=datum_value) # Did that work? self.assertEqual( TestTable.select(TestTable.datum).scalar(), datum_value) db.close() # The on-disk sqlite database should not be empty stat = os.stat(dbfile) self.assertNotEqual(stat.st_size, 0) os.unlink(rcfile) os.unlink(dbfile)
def test_no_chimedbrc(self): # This is not allowed os.environ["CHIMEDB_TEST_RC"] = 'any string containing "chimedbrc"' db.test_enable() with self.assertRaises(OSError): db.connect()
def import_file(node, root, acq_name, file_name): done = False while not done: try: _import_file(node, root, acq_name, file_name) done = True except pw.OperationalError: log.error( "MySQL connexion dropped. Will attempt to reconnect in five seconds." ) time.sleep(5) db.connect(read_write=True, reconnect=True)
def test_atomic_autocommit(self): @db.atomic(read_write=True) def inside_atomic(): TestTable.update(datum=datum_value + 1).execute() # Execute inside_atomic() # Check db.close() db.connect() self.assertEqual( TestTable.select(TestTable.datum).scalar(), datum_value + 1)
def status(all): """Summarise the status of alpenhorn storage nodes.""" import tabulate db.connect() # Data to fetch from the database (node name, total files, total size) query_info = ( di.StorageNode.name, pw.fn.Count(di.ArchiveFileCopy.id).alias("count"), pw.fn.Sum(di.ArchiveFile.size_b).alias("total_size"), di.StorageNode.host, di.StorageNode.root, ) # Per node totals nodes = ( di.StorageNode.select(*query_info) .join(di.ArchiveFileCopy) .where(di.ArchiveFileCopy.has_file == "Y") .join(di.ArchiveFile) .group_by(di.StorageNode) .order_by(di.StorageNode.name) ) if not all: nodes = nodes.where(di.StorageNode.active) # Totals for the whole archive tot = di.ArchiveFile.select( pw.fn.Count(di.ArchiveFile.id).alias("count"), pw.fn.Sum(di.ArchiveFile.size_b).alias("total_size"), ).scalar(as_tuple=True) data = [ [ node[0], int(node[1]), int(node[2]) / 2 ** 40.0, 100.0 * int(node[1]) / int(tot[0]), 100.0 * int(node[2]) / int(tot[1]), "%s:%s" % (node[3], node[4]), ] for node in nodes.tuples() ] headers = ["Node", "Files", "Size [TB]", "Files [%]", "Size [%]", "Path"] print(tabulate.tabulate(data, headers=headers, floatfmt=".1f"))
def test_atomic_raise(self): @db.atomic(read_write=True) def inside_atomic(): TestTable.update(datum=datum_value + 1).execute() raise RuntimeError # Execute with self.assertRaises(RuntimeError): inside_atomic() # Check db.close() db.connect() self.assertEqual( TestTable.select(TestTable.datum).scalar(), datum_value)
def __init__(self): super(FilterExisting, self).__init__() self.csd_list = [] self.corr_files = {} if mpiutil.rank0: # Look for CSDs in the current directory import glob files = glob.glob("*") if self.existing_csd_regex: for file_ in files: mo = re.search(self.existing_csd_regex, file_) if mo is not None: self.csd_list.append(int(mo.group(1))) # Search the database to get the start and end times of all correlation files from chimedb import data_index as di from chimedb.core import connect from ch_util import ephemeris connect() query = (di.ArchiveFile.select( di.ArchiveAcq.name, di.ArchiveFile.name, di.CorrFileInfo.start_time, di.CorrFileInfo.finish_time, ).join(di.ArchiveAcq).switch(di.ArchiveFile).join(di.CorrFileInfo)) for acq, fname, start, finish in query.tuples(): if start is None or finish is None: continue start_csd = ephemeris.csd(start) finish_csd = ephemeris.csd(finish) name = os.path.join(acq, fname) self.corr_files[name] = (start_csd, finish_csd) self.log.debug("Skipping existing CSDs %s", repr(self.csd_list)) # Broadcast results to other ranks self.corr_files = mpiutil.world.bcast(self.corr_files, root=0) self.csd_list = mpiutil.world.bcast(self.csd_list, root=0)
def active(host): """list the nodes active on this, or another specified, machine""" db.connect() if host is None: host = socket.gethostname().split(".")[0] zero = True for node in di.StorageNode.select().where( di.StorageNode.host == host, di.StorageNode.active == True ): n_file = ( di.ArchiveFileCopy.select().where(di.ArchiveFileCopy.node == node).count() ) print("%-25s %-30s %5d files" % (node.name, node.root, n_file)) zero = False if zero: print("No nodes are active on host %s." % host)
def __init__( self, broker_host, broker_port, redis_host, redis_port, log_level, failure_wait_time, ): logger.setLevel(log_level) # convert ms to s self.failure_wait_time = failure_wait_time / 1000 startup_time = datetime.datetime.utcnow() config = { "broker_host": broker_host, "broker_port": broker_port, "redis_host": redis_host, "redis_port": redis_port, "log_level": log_level, "failure_wait_time": failure_wait_time, } manager = Manager(broker_host, broker_port) try: manager.register_start(startup_time, __version__, config) except (CometError, ConnectionError) as exc: logger.error( "Comet archiver failed registering its startup and initial config: {}" .format(exc)) exit(1) # Open database connection chimedb.connect(read_write=True) # Create any missing table. chimedb.orm.create_tables("chimedb.dataset") # Open connection to redis self.redis = redis.Redis(redis_host, redis_port, encoding="utf-8", decode_responses=True)
def test_chimedb_test_sqlite(self): # Create an empty on-disk sqlite database (fd, dbfile) = tempfile.mkstemp(text=True) os.close(fd) os.environ["CHIMEDB_TEST_SQLITE"] = dbfile db.test_enable() db.connect(read_write=True) db.proxy.create_tables([TestTable]) TestTable.create(datum=datum_value) # Did that work? self.assertEqual( TestTable.select(TestTable.datum).scalar(), datum_value) db.close() # The on-disk sqlite database should not be empty anymore stat = os.stat(dbfile) self.assertNotEqual(stat.st_size, 0)
def cli(): """Alpenhorn data management service.""" # We need write access to the DB. db.connect(read_write=True) # Get the name of this host host = socket.gethostname().split(".")[0] # Get the list of nodes currently mounted node_list = list(di.StorageNode.select().where(di.StorageNode.host == host, di.StorageNode.active)) # Warn if there are no mounted nodes. We used to exit here, but actually # it's useful to keep alpenhornd running for nodes where we exclusively use # transport disks (e.g. jingle) if len(node_list) == 0: log.warn('No nodes on this host ("%s") registered in the DB!' % host) # Load the cache of already imported files auto_import.load_import_cache() # Setup the observers to watch the nodes for new files auto_import.setup_observers(node_list) # Enter main loop performing node updates try: update.update_loop(host) # Exit cleanly on a keyboard interrupt except KeyboardInterrupt: log.info("Exiting...") auto_import.stop_observers() # Wait for watchdog threads to terminate auto_import.join_observers()
def verify(node_name, md5, fixdb, acq): """Verify the archive on NODE against the database.""" db.connect() try: this_node = di.StorageNode.get(di.StorageNode.name == node_name) except pw.DoesNotExist: print("Specified node does not exist.") return ## Use a complicated query with a tuples construct to fetch everything we ## need in a single query. This massively speeds up the whole process versus ## fetching all the FileCopy's then querying for Files and Acqs. lfiles = ( di.ArchiveFile.select( di.ArchiveFile.name, di.ArchiveAcq.name, di.ArchiveFile.size_b, di.ArchiveFile.md5sum, di.ArchiveFileCopy.id, ) .join(di.ArchiveAcq) .switch(di.ArchiveFile) .join(di.ArchiveFileCopy) .where(di.ArchiveFileCopy.node == this_node, di.ArchiveFileCopy.has_file == "Y") .tuples() ) missing_files = [] corrupt_files = [] missing_ids = [] corrupt_ids = [] nfiles = 0 with click.progressbar(lfiles, label="Scanning files") as lfiles_iter: for filename, acqname, filesize, md5sum, fc_id in lfiles_iter: # Skip if not in specified acquisitions if len(acq) > 0 and acqname not in acq: continue nfiles += 1 filepath = this_node.root + "/" + acqname + "/" + filename # Check if file is plain missing if not os.path.exists(filepath): missing_files.append(filepath) missing_ids.append(fc_id) continue if md5: file_md5 = di.util.md5sum_file(filepath) corrupt = file_md5 != md5sum else: corrupt = os.path.getsize(filepath) != filesize if corrupt: corrupt_files.append(filepath) corrupt_ids.append(fc_id) continue if len(missing_files) > 0: print() print("=== Missing files ===") for fname in missing_files: print(fname) if len(corrupt_files) > 0: print() print("=== Corrupt files ===") for fname in corrupt_files: print(fname) print() print("=== Summary ===") print(" %i total files" % nfiles) print(" %i missing files" % len(missing_files)) print(" %i corrupt files" % len(corrupt_files)) print() # Fix up the database by marking files as missing, and marking # corrupt files for verification by alpenhornd. if fixdb: # Make sure we connect RW db.connect(read_write=True) if (len(missing_files) > 0) and click.confirm("Fix missing files"): missing_count = ( di.ArchiveFileCopy.update(has_file="N") .where(di.ArchiveFileCopy.id << missing_ids) .execute() ) print(" %i marked as missing" % missing_count) if (len(corrupt_files) > 0) and click.confirm("Fix corrupt files"): corrupt_count = ( di.ArchiveFileCopy.update(has_file="M") .where(di.ArchiveFileCopy.id << corrupt_ids) .execute() ) print(" %i corrupt files marked for verification" % corrupt_count)
def _import_file(node, root, acq_name, file_name): """Import a file into the DB. This routine adds the following to the database, if they do not already exist (or might be corrupted). - The acquisition that the file is a part of. - Information on the acquisition, if it is of type "corr". - The file. - Information on the file, if it is of type "corr". - Indicates that the file exists on this node. """ global import_done curr_done = True fullpath = "%s/%s/%s" % (root, acq_name, file_name) log.debug("Considering %s for import." % fullpath) # Skip the file if ch_master.py still has a lock on it. if os.path.isfile("%s/%s/.%s.lock" % (root, acq_name, file_name)): log.debug('Skipping "%s", which is locked by ch_master.py.' % fullpath) return # Parse the path try: ts, inst, atype = di.util.parse_acq_name(acq_name) except db.ValidationError: log.info("Skipping non-acquisition path %s." % acq_name) return if import_done is not None: i = bisect.bisect_left(import_done, fullpath) if i != len(import_done) and import_done[i] == fullpath: log.debug("Skipping already-registered file %s." % fullpath) return # Figure out which acquisition this is; add if necessary. try: acq = di.ArchiveAcq.get(di.ArchiveAcq.name == acq_name) log.debug('Acquisition "%s" already in DB. Skipping.' % acq_name) except pw.DoesNotExist: acq = add_acq(acq_name) if acq is None: return log.info('Acquisition "%s" added to DB.' % acq_name) # What kind of file do we have? ftype = di.util.detect_file_type(file_name) if ftype is None: log.info('Skipping unrecognised file "%s/%s".' % (acq_name, file_name)) return # Make sure information about the acquisition exists in the DB. if atype == "corr" and ftype.name == "corr": if not acq.corrinfos.count(): try: di.CorrAcqInfo.create( acq=acq, **get_acqcorrinfo_keywords_from_h5(fullpath)) log.info( 'Added information for correlator acquisition "%s" to ' "DB." % acq_name) except: log.warning('Missing info for acquistion "%s": HDF5 datasets ' "empty. Leaving fields NULL." % (acq_name)) di.CorrAcqInfo.create(acq=acq) elif atype == "hfb" and ftype.name == "hfb": if not acq.hfbinfos.count(): try: di.HFBAcqInfo.create( acq=acq, **get_acqhfbinfo_keywords_from_h5(fullpath)) log.info('Added information for HFB acquisition "%s" to ' "DB." % acq_name) except: log.warning('Missing info for acquistion "%s": HDF5 datasets ' "empty. Leaving fields NULL." % (acq_name)) di.HFBAcqInfo.create(acq=acq) elif atype == "hk" and ftype.name == "hk": try: keywords = get_acqhkinfo_keywords_from_h5("%s/%s" % (root, acq_name)) except: log.warning("Could no open atmel_id.dat file. Skipping.") keywords = [] for kw in keywords: if not sum(1 for _ in di.HKAcqInfo.select().where( di.HKAcqInfo.acq == acq).where( di.HKAcqInfo.atmel_name == kw["atmel_name"])): try: di.HKAcqInfo.create(acq=acq, **kw) log.info( 'Added information for housekeeping acquisition "%s", ' "board %s to DB." % (acq_name, kw["atmel_name"])) except: log.warning( 'Missing info for acquisition "%s": atmel_id.dat ' "file missing or corrupt. Skipping this acquisition." % acq_name) return elif atype == "rawadc": if not acq.rawadcinfos.count(): di.RawadcAcqInfo.create( acq=acq, **get_acqrawadcinfo_keywords_from_h5(acq_name)) log.info('Added information for raw ADC acquisition "%s" to ' "DB." % acq_name) # Add the file, if necessary. try: file = di.ArchiveFile.get(di.ArchiveFile.name == file_name, di.ArchiveFile.acq == acq) size_b = file.size_b log.debug('File "%s/%s" already in DB. Skipping.' % (acq_name, file_name)) except pw.DoesNotExist: log.debug("Computing md5sum.") md5sum = di.util.md5sum_file(fullpath, cmd_line=True) size_b = os.path.getsize(fullpath) done = False while not done: try: file = di.ArchiveFile.create(acq=acq, type=ftype, name=file_name, size_b=size_b, md5sum=md5sum) done = True except pw.OperationalError: log.error( "MySQL connexion dropped. Will attempt to reconnect in " "five seconds.") time.sleep(5) db.connect(read_write=True, reconnect=True) log.info('File "%s/%s" added to DB.' % (acq_name, file_name)) # Register the copy of the file here on the collection server, if (1) it does # not exist, or (2) it does exist but has been labelled as corrupt. If (2), # check again. # Use a transaction to avoid race condition with db.proxy.transaction(): if not file.copies.where(di.ArchiveFileCopy.node == node).count(): copy = di.ArchiveFileCopy.create(file=file, node=node, has_file="Y", wants_file="Y") log.info('Registered file copy "%s/%s" to DB.' % (acq_name, file_name)) # Make sure information about the file exists in the DB. if ftype.name == "corr": # Add if (1) there is no corrinfo or (2) the corrinfo is missing. if not file.corrinfos.count(): try: di.CorrFileInfo.create( file=file, **get_filecorrinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.corrinfos.count(): di.CorrFileInfo.create(file=file) log.warning('Missing info for file "%s/%s": HDF5 datasets ' "empty or unreadable. Leaving fields NULL." % (acq_name, file_name)) elif not file.corrinfos[0].start_time: try: i = file.corrinfos[0] k = get_filecorrinfo_keywords_from_h5(fullpath) except: log.debug('Still missing info for file "%s/%s".') else: i.start_time = k["start_time"] i.finish_time = k["finish_time"] i.chunk_number = k["chunk_number"] i.freq_number = k["freq_number"] i.save() log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) elif ftype.name == "hfb": # Add if (1) there is no corrinfo or (2) the corrinfo is missing. if not file.hfbinfos.count(): try: di.HFBFileInfo.create( file=file, **get_filehfbinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.hfbinfos.count(): di.HFBFileInfo.create(file=file) log.warning('Missing info for file "%s/%s": HDF5 datasets ' "empty or unreadable. Leaving fields NULL." % (acq_name, file_name)) elif not file.hfbinfos[0].start_time: try: i = file.hfbinfos[0] k = get_filehfbinfo_keywords_from_h5(fullpath) except: log.debug('Still missing info for file "%s/%s".') else: i.start_time = k["start_time"] i.finish_time = k["finish_time"] i.chunk_number = k["chunk_number"] i.freq_number = k["freq_number"] i.save() log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) elif ftype.name == "hk": # Add if (1) there is no hkinfo or (2) the hkinfo is missing. if not file.hkinfos.count(): try: di.HKFileInfo.create( file=file, **get_filehkinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.corrinfos.count(): di.HKFileInfo.create(file=file) log.warning('Missing info for file "%s/%s": HDF5 datasets ' "empty or unreadable. Leaving fields NULL." % (acq_name, file_name)) elif not file.hkinfos[0].start_time: try: i = file.hkinfos[0] k = get_filehkinfo_keywords_from_h5(fullpath) except: log.debug('Still missing info for file "%s/%s".') else: i.start_time = k["start_time"] i.finish_time = k["finish_time"] i.atmel_name = k["atmel_name"] i.chunk_number = k["chunk_number"] i.save() log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) elif ftype.name == "weather": # Add if (1) there is no weatherinfo or (2) the weatherinfo is missing. if not file.weatherinfos.count(): # try: di.WeatherFileInfo.create( file=file, **get_fileweatherinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) # except: # if not file.corrinfos.count(): # di.WeatherFileInfo.create(file=file) # log.warning("Missing info for file \"%s/%s\": HDF5 datasets " \ # "empty or unreadable. Leaving fields NULL." % # (acq_name, file_name)) elif not file.weatherinfos[0].start_time: try: i = file.weatherinfos[0] k = get_fileweatherinfo_keywords_from_h5(fullpath) except: log.debug('Still missing info for file "%s/%s".') else: i.start_time = k["start_time"] i.finish_time = k["finish_time"] i.date = k["date"] i.save() log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) elif ftype.name == "rawadc": # Add if there is no rawadcinfo if not file.rawadcinfos.count(): try: di.RawadcFileInfo.create( file=file, **get_filerawadcinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.rawadcinfos.count(): di.RawadcFileInfo.create(file=file) log.warning( 'Missing info for file "%s/%s". Leaving fields NULL.' % (acq_name, file_name)) elif ftype.name == "hkp": # Add if there is no hkpinfo if not file.hkpinfos.count(): try: di.HKPFileInfo.create( file=file, **get_filehkpinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.hkpinfos.count(): di.HKPFileInfo.create(file=file) log.warning( 'Missing info for file "%s/%s". Leaving fields NULL.' % (acq_name, file_name)) elif atype == "digitalgain" and ftype.name == "calibration": if not file.digitalgaininfos.count(): try: di.DigitalGainFileInfo.create( file=file, **get_filedigitalgaininfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.digitalgaininfos.count(): di.DigitalGainFileInfo.create(file=file) log.warning( 'Missing info for file "%s/%s". Leaving fields NULL.' % (acq_name, file_name)) elif atype == "gain" and ftype.name == "calibration": if not file.calibrationgaininfos.count(): try: di.CalibrationGainFileInfo.create( file=file, **get_filecalibrationgaininfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.calibrationgaininfos.count(): di.CalibrationGainFileInfo.create(file=file) log.warning( 'Missing info for file "%s/%s". Leaving fields NULL.' % (acq_name, file_name)) elif atype == "flaginput" and ftype.name == "calibration": if not file.flaginputinfos.count(): try: di.FlagInputFileInfo.create( file=file, **get_fileflaginputinfo_keywords_from_h5(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) except: if not file.flaginputinfos.count(): di.FlagInputFileInfo.create(file=file) log.warning( 'Missing info for file "%s/%s". Leaving fields NULL.' % (acq_name, file_name)) elif atype == "misc" and ftype.name == "miscellaneous": with db.proxy.atomic(): if not file.miscfileinfos.count(): di.MiscFileInfo.create(file=file, **get_miscfile_data(fullpath)) log.info('Added information for file "%s/%s" to DB.' % (acq_name, file_name)) if import_done is not None: bisect.insort_left(import_done, fullpath) with open(LOCAL_IMPORT_RECORD, "w") as fp: fp.write("\n".join(import_done))
def _available_files(self, start_csd, end_csd): """ Return chimestack files available in cedar_online between start_csd and end_csd, if all of the files for that period are available online. Return an empty list if files between start_csd and end_csd are only partially available online. Total file count is verified by checking files that exist everywhere. Parameters ---------- start_csd : int Start date in sidereal day format end_csd : int End date in sidereal day format Returns ------- list List contains the chimestack files available in the timespan, if all of them are available online """ # Connect to databases db.connect() # Get timestamps in unix format # Needed for queries start_time = ephemeris.csd_to_unix(start_csd) end_time = ephemeris.csd_to_unix(end_csd) # We will want to know which files are in chime_online and nearline on cedar online_node = di.StorageNode.get(name="cedar_online", active=True) chimestack_inst = di.ArchiveInst.get(name="chimestack") # TODO if the time range is so small that it’s completely contained within a single file, nothing will be returned # have to special-case it by looking for files which start before the start time and end after the end time). archive_files = (di.ArchiveFileCopy.select( di.CorrFileInfo.start_time, di.CorrFileInfo.finish_time, ).join(di.ArchiveFile).join(di.ArchiveAcq).switch(di.ArchiveFile).join( di.CorrFileInfo)) # chimestack files available online which include between start and end_time files_that_exist = archive_files.where( di.ArchiveAcq.inst == chimestack_inst, # specifically looking for chimestack files di.CorrFileInfo.start_time < end_time, # which contain data that includes start time and end time di.CorrFileInfo.finish_time >= start_time, di.ArchiveFileCopy.has_file == "Y", ) files_online = files_that_exist.where( di.ArchiveFileCopy.node == online_node, # that are online ) filenames_online = sorted([t for t in files_online.tuples()]) # files_that_exist might contain the same file multiple files # if it exists in multiple locations (nearline, online, gossec, etc) # we only want to include it once filenames_that_exist = sorted( list(set(t for t in files_that_exist.tuples()))) return filenames_online, filenames_that_exist
def _create_hook(self): """Create the revision. This tries to determine which days are good and bad, and partitions the available good days into the individual stacks. """ days = {} core.connect() # Go over each revision and construct the set of LSDs we should stack, and save # the path to each. # NOTE: later entries is `daily_revisions` will override LSDs found in earlier # revisions. for rev in self.default_params["daily_revisions"]: daily_path = ( self.root_path if self.default_params["daily_root"] is None else self.default_params["daily_root"] ) daily_rev = daily.DailyProcessing(rev, root_path=daily_path) # Get all the bad days in this revision revision = df.DataRevision.get(name=rev) query = ( df.DataFlagOpinion.select(df.DataFlagOpinion.lsd) .distinct() .where( df.DataFlagOpinion.revision == revision, df.DataFlagOpinion.decision == "bad", ) ) bad_days = [x[0] for x in query.tuples()] # Get all the good days query = ( df.DataFlagOpinion.select(df.DataFlagOpinion.lsd) .distinct() .where( df.DataFlagOpinion.revision == revision, df.DataFlagOpinion.decision == "good", ) ) good_days = [x[0] for x in query.tuples()] for d in daily_rev.ls(): # Filter out known bad days here if (int(d) in bad_days) or (int(d) not in good_days): continue # Insert the day and path into the dict, this will replace the entries # from prior revisions path = daily_rev.base_path / d lsd = int(d) days[lsd] = path lsds = sorted(days) # Map each LSD into the quarter it belongs in and find which quarters we have # data for dates = ctime.unix_to_datetime(ephemeris.csd_to_unix(np.array(lsds))) yq = np.array([f"{d.year}q{(d.month - 1) // 3 + 1}" for d in dates]) quarters = np.unique(yq) npart = self.default_params["partitions"] lsd_partitions = {} # For each quarter divide the LSDs it contains into a number of partitions to # give jack knifes for quarter in quarters: lsds_in_quarter = sorted(np.array(lsds)[yq == quarter]) # Skip quarters with two few days in them if len(lsds_in_quarter) < self.default_params["min_days"] * npart: continue for i in range(npart): lsd_partitions[f"{quarter}p{i}"] = [ int(d) for d in lsds_in_quarter[i::npart] ] # Save the relevant parameters into the revisions configuration self.default_params["days"] = { int(day): str(path) for day, path in days.items() } self.default_params["stacks"] = lsd_partitions
def format_transport(serial_num): """Interactive routine for formatting a transport disc as a storage node; formats and labels the disc as necessary, the adds to the database. The disk is specified using the manufacturers SERIAL_NUM, which is printed on the disk. """ if os.getuid() != 0: print("You must be root to run mount on a transport disc. I quit.") return # Make sure we connect RW db.connect(read_write=True) # Find the disc. dev = glob.glob("/dev/disk/by-id/*%s" % serial_num) if len(dev) == 0: print("No disc with that serial number is attached.") return elif len(dev) > 1: print("Confused: found more than one device matching that serial number:") for d in dev: print(" %s" % dev) print("Aborting.") return dev = dev[0] dev_part = "%s-part1" % dev # Figure out if it is formatted. print("Checking to see if disc is formatted. Please wait.") fp = os.popen("parted -s %s print" % dev) formatted = False part_start = False while True: l = fp.readline() if not l: break if l.find("Number") == 0 and l.find("Start") > 0 and l.find("File system") > 0: part_start = True elif l.strip() != "" and part_start: formatted = True fp.close() if not formatted: if not click.confirm("Disc is not formatted. Should I format it?"): return print("Creating partition. Please wait.") os.system( "parted -s -a optimal %s mklabel gpt -- mkpart primary 0%% 100%%" % dev ) print("Formatting disc. Please wait.") time.sleep(5) # Sleep for a few seconds to allow the partition to appear os.system("mkfs.ext4 %s -m 0 -L CH-%s" % (dev_part, serial_num)) else: print("Disc is already formatted.") e2label = get_e2label(dev_part) name = "CH-%s" % serial_num if e2label and e2label != name: print( "Disc label %s does not conform to labelling standard, " "which is CH-<serialnum>." ) exit elif not e2label: print('Labelling the disc as "%s" (using e2label) ...' % (name)) assert dev_part is not None assert len(name) <= MAX_E2LABEL_LEN stat = os.system("/sbin/e2label %s %s" % (dev_part, name)) if stat: print("Failed to e2label! Stat = %s. I quit." % (stat)) exit() # Ensure the mount path exists. root = "/mnt/%s" % name if not os.path.isdir(root): print("Creating mount point %s." % root) os.mkdir(root) # Check to see if the disc is active. fp = os.popen("df") active = False dev_part_abs = os.path.realpath(dev_part) while 1: l = fp.readline() if not l: break if l.find(root) > 0: if l[: len(dev_part)] == dev or l[: len(dev_part_abs)] == dev_part_abs: active = True else: print( "%s is a mount point, but %s is already active there."( root, l.split()[0] ) ) fp.close() try: node = di.StorageNode.get(name=name) except pw.DoesNotExist: print( "This disc has not been registered yet as a storage node. " "Registering now." ) try: group = di.StorageGroup.get(name="transport") except pw.DoesNotExist: print('Hmmm. Storage group "transport" does not exist. I quit.') exit() # We need to write to the database. db.connect(read_write=True) node = di.StorageNode.create( name=name, root=root, group=group, storage_type="T", min_avail_gb=1 ) print("Successfully created storage node.") print("Node created but not active. Run alpenhorn mount_transport for that.")
def test_connect_rw(self): db.connect(read_write=True) TestTable.update(datum=datum_value * 2).execute() self.assertEqual(TestTable.select(TestTable.datum).scalar(), datum_value * 2)
def test_connect(self): db.connect() self.assertEqual(TestTable.select(TestTable.datum).scalar(), datum_value)
def import_files(node_name, verbose, acq, dry): """Scan the current directory for known acquisition files and add them into the database for NODE. This command is useful for manually maintaining an archive where we can run alpenhornd in the usual manner. """ db.connect(read_write=True) # Construct list of acqs to scan if acq is None: acqs = glob.glob("*") else: acqs = acq # Keep track of state as we process the files added_files = [] # Files we have added to the database corrupt_files = [] # Known files which are corrupt registered_files = [] # Files already registered in the database unknown_files = [] # Files not known in the database not_acqs = [] # Directories which were not known acquisitions # Fetch a reference to the node try: node = di.StorageNode.select().where(di.StorageNode.name == node_name).get() except pw.DoesNotExist: print("Unknown node.") return with click.progressbar(acqs, label="Scanning acquisitions") as acq_iter: for acq_name in acq_iter: try: di.util.parse_acq_name(acq_name) except db.ValidationError: not_acqs.append(acq_name) continue try: acq = di.ArchiveAcq.select().where(di.ArchiveAcq.name == acq_name).get() except pw.DoesNotExist: not_acqs.append(acq_name) continue files = glob.glob(acq_name + "/*") # Fetch lists of all files in this acquisition, and all # files in this acq with local copies file_names = [f.name for f in acq.files] local_file_names = [ f.name for f in acq.files.join(di.ArchiveFileCopy).where( di.ArchiveFileCopy.node == node ) ] for fn in files: f_name = os.path.split(fn)[1] # Check if file exists in database if f_name not in file_names: unknown_files.append(fn) continue # Check if file is already registered on this node if f_name in local_file_names: registered_files.append(fn) else: archive_file = ( di.ArchiveFile.select() .where(di.ArchiveFile.name == f_name, di.ArchiveFile.acq == acq) .get() ) if os.path.getsize(fn) != archive_file.size_b: corrupt_files.append(fn) continue added_files.append(fn) if not dry: di.ArchiveFileCopy.create( file=archive_file, node=node, has_file="Y", wants_file="Y" ) print("\n==== Summary ====") print() print("Added %i files" % len(added_files)) print() print("%i corrupt files." % len(corrupt_files)) print("%i files already registered." % len(registered_files)) print("%i files not known" % len(unknown_files)) print("%i directories were not acquisitions." % len(not_acqs)) if verbose > 0: print() print("Added files:") print() for fn in added_files: print(fn) if verbose > 1: print("Corrupt:") for fn in corrupt_files: print(fn) print() print("Unknown files:") for fn in unknown_files: print(fn) print() print("Unknown acquisitions:") for fn in not_acqs: print(fn) print()
def update_node_requests(node): """Process file copy requests onto this node.""" global done_transport_this_cycle # Ensure we are not on an HPSS node if is_hpss_node(node): log.error("Cannot process HPSS node here.") return # Skip if node is too full if node.avail_gb < (node.min_avail_gb + 10): log.info("Node %s is nearly full. Skip transfers." % node.name) return # Calculate the total archive size from the database size_query = (di.ArchiveFile.select(fn.Sum(di.ArchiveFile.size_b)).join( di.ArchiveFileCopy).where(di.ArchiveFileCopy.node == node, di.ArchiveFileCopy.has_file == "Y")) size = size_query.scalar(as_tuple=True)[0] current_size_gb = float(0.0 if size is None else size) / 2**30.0 # Stop if the current archive size is bigger than the maximum (if set, i.e. > 0) if current_size_gb > node.max_total_gb and node.max_total_gb > 0.0: log.info( "Node %s has reached maximum size (current: %.1f GB, limit: %.1f GB)" % (node.name, current_size_gb, node.max_total_gb)) return # ... OR if this is a transport node quit if the transport cycle is done. if node.storage_type == "T" and done_transport_this_cycle: log.info("Ignoring transport node %s" % node.name) return start_time = time.time() # Fetch requests to process from the database requests = di.ArchiveFileCopyRequest.select().where( ~di.ArchiveFileCopyRequest.completed, ~di.ArchiveFileCopyRequest.cancelled, di.ArchiveFileCopyRequest.group_to == node.group, ) # Add in constraint that node_from cannot be an HPSS node requests = requests.join( di.StorageNode).where(di.StorageNode.address != "HPSS") for req in requests: if time.time() - start_time > max_time_per_node_operation: break # Don't hog all the time. # By default, if a copy fails, we mark the source file as suspect # so it gets re-MD5'd on the source node. check_source_on_err = True # Only continue if the node is actually active if not req.node_from.active: continue # For transport disks we should only copy onto the transport # node if the from_node is local, this should prevent pointlessly # rsyncing across the network if node.storage_type == "T" and node.host != req.node_from.host: log.debug( "Skipping request for %s/%s from remote node [%s] onto local " "transport disks" % (req.file.acq.name, req.file.name, req.node_from.name)) continue # Only proceed if the destination file does not already exist. try: di.ArchiveFileCopy.get( di.ArchiveFileCopy.file == req.file, di.ArchiveFileCopy.node == node, di.ArchiveFileCopy.has_file == "Y", ) log.info("Skipping request for %s/%s since it already exists on " 'this node ("%s"), and updating DB to reflect this.' % (req.file.acq.name, req.file.name, node.name)) di.ArchiveFileCopyRequest.update(completed=True).where( di.ArchiveFileCopyRequest.file == req.file).where( di.ArchiveFileCopyRequest.group_to == node.group).execute() continue except pw.DoesNotExist: pass # Only proceed if the source file actually exists (and is not corrupted). try: di.ArchiveFileCopy.get( di.ArchiveFileCopy.file == req.file, di.ArchiveFileCopy.node == req.node_from, di.ArchiveFileCopy.has_file == "Y", ) except pw.DoesNotExist: log.error( "Skipping request for %s/%s since it is not available on " 'node "%s". [file_id=%i]' % (req.file.acq.name, req.file.name, req.node_from.name, req.file.id)) continue # Check that there is enough space available. if node.avail_gb * 2**30.0 < 2.0 * req.file.size_b: log.warning('Node "%s" is full: not adding datafile "%s/%s".' % (node.name, req.file.acq.name, req.file.name)) continue # Constuct the origin and destination paths. from_path = "%s/%s/%s" % (req.node_from.root, req.file.acq.name, req.file.name) if req.node_from.host != node.host: from_path = "%s@%s:%s" % ( req.node_from.username, req.node_from.address, from_path, ) to_path = "%s/%s/" % (node.root, req.file.acq.name) if not os.path.isdir(to_path): log.info('Creating directory "%s".' % to_path) os.mkdir(to_path) # Giddy up! log.info('Transferring file "%s/%s".' % (req.file.acq.name, req.file.name)) st = time.time() # For the potential error message later stderr = None # Attempt to transfer the file. Each of the methods below needs to set a # return code `ret` and give an `md5sum` of the transferred file. # First we need to check if we are copying over the network if req.node_from.host != node.host: # First try bbcp which is a fast multistream transfer tool. bbcp can # calculate the md5 hash as it goes, so we'll do that to save doing # it at the end. if command_available("bbcp"): ret, stdout, stderr = run_command([ "bbcp", "-V", "-f", "-z", "--port", "4200", "-W", "4M", "-s", "16", "-e", "-E", "%md5=", from_path, to_path, ]) # Attempt to parse STDERR for the md5 hash if ret == 0: mo = re.search("md5 ([a-f0-9]{32})", stderr) if mo is None: log.error( "BBCP transfer has gone awry. STDOUT: %s\n STDERR: %s" % (stdout, stderr)) ret = -1 md5sum = mo.group(1) else: md5sum = None # Next try rsync over ssh. elif command_available("rsync"): ret, stdout, stderr = run_command( ["rsync", "--compress"] + RSYNC_OPTS + [ "--rsync-path=ionice -c2 -n4 rsync", "--rsh=ssh -q", from_path, to_path, ]) # rsync v3+ already does a whole-file MD5 sum while # transferring and guarantees the written file has the same # MD5 sum as the source file, so we can skip the check here. md5sum = req.file.md5sum if ret == 0 else None # If the rsync error occured during `mkstemp` this is a # problem on the destination, not the source if ret and "mkstemp" in stderr: log.warn('rsync file creation failed on "{0}"'.format( node.name)) check_source_on_err = False elif "write failed on" in stderr: log.warn('rsync failed to write to "{0}": {1}'.format( node.name, stderr[stderr.rfind(":") + 2:].strip())) check_source_on_err = False # If we get here then we have no idea how to transfer the file... else: log.warn("No commands available to complete this transfer.") check_source_on_err = False ret = -1 # Okay, great we're just doing a local transfer. else: # First try to just hard link the file. This will only work if we # are on the same filesystem. As there's no actual copying it's # probably unecessary to calculate the md5 check sum, so we'll just # fake it. try: link_path = "%s/%s/%s" % (node.root, req.file.acq.name, req.file.name) # Check explicitly if link already exists as this and # being unable to link will both raise OSError and get # confused. if os.path.exists(link_path): log.error("File %s already exists. Clean up manually." % link_path) check_source_on_err = False ret = -1 else: os.link(from_path, link_path) ret = 0 md5sum = ( req.file.md5sum ) # As we're linking the md5sum can't change. Skip the check here... # If we couldn't just link the file, try copying it with rsync. except OSError: if command_available("rsync"): ret, stdout, stderr = run_command(["rsync"] + RSYNC_OPTS + [from_path, to_path]) # rsync v3+ already does a whole-file MD5 sum while # transferring and guarantees the written file has the same # MD5 sum as the source file, so we can skip the check here. md5sum = req.file.md5sum if ret == 0 else None # If the rsync error occured during `mkstemp` this is a # problem on the destination, not the source if ret and "mkstemp" in stderr: log.warn('rsync file creation failed on "{0}"'.format( node.name)) check_source_on_err = False elif "write failed on" in stderr: log.warn('rsync failed to write to "{0}": {1}'.format( node.name, stderr[stderr.rfind(":") + 2:].strip())) check_source_on_err = False else: log.warn( "No commands available to complete this transfer.") check_source_on_err = False ret = -1 # Check the return code... if ret: if check_source_on_err: # If the copy didn't work, then the remote file may be corrupted. log.error( "Copy failed: {0}. Marking source file suspect.".format( stderr if stderr is not None else "Unspecified error.") ) di.ArchiveFileCopy.update(has_file="M").where( di.ArchiveFileCopy.file == req.file, di.ArchiveFileCopy.node == req.node_from, ).execute() else: # An error occurred that can't be due to the source # being corrupt log.error("Copy failed.") continue et = time.time() # Check integrity. if md5sum == req.file.md5sum: size_mb = req.file.size_b / 2**20.0 trans_time = et - st rate = size_mb / trans_time log.info( "Pull complete (md5sum correct). Transferred %.1f MB in %i " "seconds [%.1f MB/s]" % (size_mb, int(trans_time), rate)) # Update the FileCopy (if exists), or insert a new FileCopy # Use transaction to avoid race condition with db.proxy.transaction(): try: done = False while not done: try: fcopy = (di.ArchiveFileCopy.select().where( di.ArchiveFileCopy.file == req.file, di.ArchiveFileCopy.node == node, ).get()) fcopy.has_file = "Y" fcopy.wants_file = "Y" fcopy.save() done = True except pw.OperationalError: log.error( "MySQL connexion dropped. Will attempt to reconnect in " "five seconds.") time.sleep(5) db.connect(True) except pw.DoesNotExist: di.ArchiveFileCopy.insert(file=req.file, node=node, has_file="Y", wants_file="Y").execute() # Mark any FileCopyRequest for this file as completed di.ArchiveFileCopyRequest.update(completed=True).where( di.ArchiveFileCopyRequest.file == req.file).where( di.ArchiveFileCopyRequest.group_to == node.group).execute() if node.storage_type == "T": # This node is getting the transport king. done_transport_this_cycle = True # Update node available space update_node_free_space(node) else: log.error('Error with md5sum check: %s on node "%s", but %s on ' 'this node, "%s".' % (req.file.md5sum, req.node_from.name, md5sum, node.name)) log.error('Removing file "%s/%s".' % (to_path, req.file.name)) try: os.remove("%s/%s" % (to_path, req.file.name)) except: log.error("Could not remove file.") # Since the md5sum failed, the remote file may be corrupted. log.error("Marking source file suspect.") di.ArchiveFileCopy.update(has_file="M").where( di.ArchiveFileCopy.file == req.file, di.ArchiveFileCopy.node == req.node_from, ).execute()
def test_connect_ro(self): db.connect() with self.assertRaises(pw.OperationalError): TestTable.update(datum=datum_value * 2).execute()
def clean(node_name, days, size, force, now, target, acq): """Clean up NODE by marking files as potentially removable. If --target is specified we will only remove files already available in the TARGET_GROUP. This is useful for cleaning out intermediate locations such as transport disks. Using the --days flag will only clean correlator and housekeeping files which have a timestamp associated with them. It will not touch other types. If no --days flag is given, all files will be considered for removal. The size specified with --size is always rounded up depending on the size of the files marked for removal. Files in this mode are ordered by registration time (i.e. database order), not by acquisition time, and so it will mark all files, not just those with a timestamp. If more than <size> GiB of files are already marked for removal, no new files will be marked. The --size and --days flags are mutually exclusive. """ db.connect(read_write=True) # Check for clashing arguments if days is not None and size is not None: raise ValueError("Parameter error: you cannot specify both --days and --size") # Ignore weird values if size is not None and size <= 0: print("Nothing selected for cleaning.") return try: this_node = di.StorageNode.get(di.StorageNode.name == node_name) except pw.DoesNotExist: print("Specified node does not exist.") return # Check to see if we are on an archive node if this_node.storage_type == "A": if force or click.confirm("DANGER: run clean on archive node?"): print("%s is an archive node. Forcing clean." % node_name) else: print("Cannot clean archive node %s without forcing." % node_name) return # Select FileCopys on this node. files = ( di.ArchiveFileCopy.select( di.ArchiveFileCopy.id, di.ArchiveFileCopy.wants_file, di.ArchiveFile.size_b ) .join(di.ArchiveFile) .where(di.ArchiveFileCopy.node == this_node) .order_by(di.ArchiveFile.id) ) # If size is specified, we select files that are currently on the node, # and ignore wants_file. Otherwise, we select all files destined for # this node (wants_file == 'Y'), whether or not they're already on it if size is None: files = files.where(di.ArchiveFileCopy.wants_file == "Y") else: files = files.where(di.ArchiveFileCopy.has_file == "Y") # Limit to acquisition if acq is not None: try: acq = di.ArchiveAcq.get(name=acq) except pw.DoesNotExit: raise db.NotFoundError("Specified acquisition %s does not exist" % acq) files_in_acq = di.ArchiveFile.select().where(di.ArchiveFile.acq == acq) files = files.where(di.ArchiveFileCopy.file << files_in_acq) # If the target option has been specified, only clean files also available there... if target is not None: # Fetch a reference to the target group try: target_group = di.StorageGroup.get(name=target) except pw.DoesNotExist: raise db.NotFoundError( 'Target group "%s" does not exist in the DB.' % target ) # First get the nodes at the destination... nodes_at_target = di.StorageNode.select().where( di.StorageNode.group == target_group ) # Then use this to get a list of all files at the destination... files_at_target = ( di.ArchiveFile.select() .join(di.ArchiveFileCopy) .where( di.ArchiveFileCopy.node << nodes_at_target, di.ArchiveFileCopy.has_file == "Y", ) ) # Only match files that are also available at the target files = files.where(di.ArchiveFileCopy.file << files_at_target) # If --days has been set we need to restrict to files older than the given # time. This only works for a few particular file types if days is not None and days > 0: # Get the time for the oldest files to keep oldest_unix = time.time() - 24 * 3600 * days # List of filetypes we want to update, needs a human readable name and a # FileInfo table. filetypes = [["correlation", di.CorrFileInfo], ["housekeeping", di.HKFileInfo]] file_ids = [] # Iterate over file types for cleaning for name, infotable in filetypes: # Filter to fetch only ones with a start time older than `oldest` oldfiles = files.join(infotable).where(infotable.start_time < oldest_unix) local_file_ids = list(oldfiles) # Get number of correlation files count = oldfiles.count() if count > 0: size_bytes = ( di.ArchiveFileCopy.select(pw.fn.Sum(di.ArchiveFile.size_b)) .join(di.ArchiveFile) .where(di.ArchiveFileCopy.id << local_file_ids) .scalar() ) size_gb = int(size_bytes) / 2 ** 30.0 print( "Cleaning up %i %s files (%.1f GB) from %s " % (count, name, size_gb, node_name) ) file_ids += local_file_ids # If size is set, iterate through files until we've satisfied the size given elif size is not None: # Convert to bytes size *= 2 ** 30 # Iterate though the file list until we've found enough files marked_size = 0 count = 0 file_ids = list() for copy in files: # Add the file to the list to be marked only if necessary. # We can escallate wants_file = 'M' to 'N' here if copy.wants_file == "Y" or (now and copy.wants_file == "M"): file_ids.append(copy) marked_size += copy.file.size_b count += 1 # Check if we're done. The size subtracton happens even if # the file wasn't added to the list size -= copy.file.size_b if size <= 0: break if count > 0: print( "Cleaning up %i files (%.1f GB) from %s " % (count, marked_size / 2 ** 30, node_name) ) else: print( "Size parameter already satisfied. No new files marked for cleaning." ) return # If neither days nor size is not set, then just select all files that # meet the requirements so far else: file_ids = list(files) count = files.count() if count > 0: size_bytes = ( di.ArchiveFileCopy.select(pw.fn.Sum(di.ArchiveFile.size_b)) .join(di.ArchiveFile) .where(di.ArchiveFileCopy.id << file_ids) .scalar() ) size_gb = int(size_bytes) / 1073741824.0 print( "Cleaning up %i files (%.1f GB) from %s " % (count, size_gb, node_name) ) # If there are any files to clean, ask for confirmation and the mark them in # the database for removal if len(file_ids) > 0: if force or click.confirm(" Are you sure?"): print(" Marking files for cleaning.") state = "N" if now else "M" update = di.ArchiveFileCopy.update(wants_file=state).where( di.ArchiveFileCopy.id << file_ids ) n = update.execute() print("Marked %i files for cleaning" % n) else: print(" Cancelled") else: print("No files selected for cleaning on %s." % node_name)
import peewee as pw import click import chimedb.core as db import chimedb.data_index.orm as di from . import logger # Import logger here to avoid connection # messages for transfer # Get a reference to the log log = logger.get_log() # Connect to the database read/write db.connect(read_write=True) @click.group() def cli(): """Call back commands for updating the database from a shell script after an HPSS transfer.""" pass @cli.command() @click.argument("file_id", type=int) @click.argument("node_id", type=int) def push_failed(file_id, node_id): """Update the database to reflect that the HPSS transfer failed.
def sync( node_name, group_name, acq, force, nice, target, transport, show_acq, show_files ): """Copy all files from NODE to GROUP that are not already present. We can also use the --target option to only transfer files that are not available on both the destination group, and the TARGET_GROUP. This is useful for transferring data to a staging location before going to a final archive (e.g. HPSS, transport disks). """ # Make sure we connect RW db.connect(read_write=True) try: from_node = di.StorageNode.get(name=node_name) except pw.DoesNotExist: raise db.NotFoundError('Node "%s" does not exist in the DB.' % node_name) try: to_group = di.StorageGroup.get(name=group_name) except pw.DoesNotExist: raise db.NotFoundError('Group "%s" does not exist in the DB.' % group_name) # Construct list of file copies that are available on the source node, and # not available on any nodes at the destination. This query is quite complex # so I've broken it up... # First get the nodes at the destination... nodes_at_dest = di.StorageNode.select().where(di.StorageNode.group == to_group) # Then use this to get a list of all files at the destination... files_at_dest = ( di.ArchiveFile.select() .join(di.ArchiveFileCopy) .where( di.ArchiveFileCopy.node << nodes_at_dest, di.ArchiveFileCopy.has_file == "Y" ) ) # Then combine to get all file(copies) that are available at the source but # not at the destination... copy = di.ArchiveFileCopy.select().where( di.ArchiveFileCopy.node == from_node, di.ArchiveFileCopy.has_file == "Y", ~(di.ArchiveFileCopy.file << files_at_dest), ) # If the target option has been specified, only copy nodes also not # available there... if target is not None: # Fetch a reference to the target group try: target_group = di.StorageGroup.get(name=target) except pw.DoesNotExist: raise db.NotFoundError( 'Target group "%s" does not exist in the DB.' % target ) # First get the nodes at the destination... nodes_at_target = di.StorageNode.select().where( di.StorageNode.group == target_group ) # Then use this to get a list of all files at the destination... files_at_target = ( di.ArchiveFile.select() .join(di.ArchiveFileCopy) .where( di.ArchiveFileCopy.node << nodes_at_target, di.ArchiveFileCopy.has_file == "Y", ) ) # Only match files that are also not available at the target copy = copy.where(~(di.ArchiveFileCopy.file << files_at_target)) # In transport mode (DEPRECATED) we only move files that don't have an # archive copy elsewhere... if transport: import warnings warnings.warn("Transport mode is deprecated. Try to use --target instead.") # Get list of other archive nodes other_archive_nodes = di.StorageNode.select().where( di.StorageNode.storage_type == "A", di.StorageNode.id != from_node ) files_in_archive = ( di.ArchiveFile.select() .join(di.ArchiveFileCopy) .where( di.ArchiveFileCopy.node << other_archive_nodes, di.ArchiveFileCopy.has_file == "Y", ) ) copy = copy.where(~(di.ArchiveFileCopy.file << files_in_archive)) # Join onto ArchiveFile for later query parts copy = copy.join(di.ArchiveFile) # If requested, limit query to a specific acquisition... if acq is not None: # Fetch acq if specified try: acq = di.ArchiveAcq.get(name=acq) except pw.DoesNotExist: raise db.NotFoundError('Acquisition "%s" does not exist in the DB.' % acq) # Restrict files to be in the acquisition copy = copy.where(di.ArchiveFile.acq == acq) if not copy.count(): print("No files to copy from node %s." % (node_name)) return # Show acquisitions based summary of files to be copied if show_acq: acqs = [c.file.acq.name for c in copy] import collections for acq, count in collections.Counter(acqs).items(): print("%s [%i files]" % (acq, count)) # Show all files to be copied if show_files: for c in copy: print("%s/%s" % (c.file.acq.name, c.file.name)) size_bytes = copy.select(pw.fn.Sum(di.ArchiveFile.size_b)).scalar() size_gb = int(size_bytes) / 1073741824.0 print( "Will request that %d files (%.1f GB) be copied from node %s to group %s." % (copy.count(), size_gb, node_name, group_name) ) if not (force or click.confirm("Do you want to proceed?")): print("Aborted.") return dtnow = datetime.datetime.now() # Perform update in a transaction to avoid any clobbering from concurrent updates with di.ArchiveFileCopyRequest._meta.database.atomic(): # Get a list of all the file ids for the copies we should perform files_ids = [c.file_id for c in copy] # Get a list of all the file ids for exisiting requests requests = di.ArchiveFileCopyRequest.select().where( di.ArchiveFileCopyRequest.group_to == to_group, di.ArchiveFileCopyRequest.node_from == from_node, ) req_file_ids = [req.file_id for req in requests] # Separate the files into ones that already have requests and ones that don't files_in = [x for x in files_ids if x in req_file_ids] files_out = [x for x in files_ids if x not in req_file_ids] sys.stdout.write( "Updating %i existing requests and inserting %i new ones.\n" % (len(files_in), len(files_out)) ) # Perform an update of all the existing copy requests if len(files_in) > 0: update = di.ArchiveFileCopyRequest.update( nice=nice, completed=False, cancelled=False, timestamp=dtnow, n_requests=di.ArchiveFileCopyRequest.n_requests + 1, ) update = update.where( di.ArchiveFileCopyRequest.file << files_in, di.ArchiveFileCopyRequest.group_to == to_group, di.ArchiveFileCopyRequest.node_from == from_node, ) update.execute() # Insert any new requests if len(files_out) > 0: # Construct a list of all the rows to insert insert = [ { "file": fid, "node_from": from_node, "nice": 0, "group_to": to_group, "completed": False, "n_requests": 1, "timestamp": dtnow, } for fid in files_out ] # Do a bulk insert of these new rows di.ArchiveFileCopyRequest.insert_many(insert).execute()