Esempio n. 1
0
def activate(name, path, user, address, hostname):
    """Interactive routine for activating a storage node located at ROOT."""

    # We need to write to the database.
    db.connect(read_write=True)

    try:
        node = di.StorageNode.get(name=name)
    except pw.DoesNotExist:
        print('Storage node "%s" does not exist. I quit.' % name)

    if node.active:
        print('Node "%s" is already active.' % name)
        return

    # Set the default hostname if required
    if hostname is None:
        hostname = socket.gethostname().split(".")[0]
        print('I will set the host to "%s".' % hostname)

    # Set the parameters of this node
    node.username = user
    node.address = address
    node.active = True
    node.host = hostname

    if path is not None:
        node.root = path

    node.save()

    print('Successfully activated "%s".' % name)
Esempio n. 2
0
def db_conn():
    """Set up chimedb.core for testing with a local dummy DB."""
    (fd, rcfile) = tempfile.mkstemp(text=True)
    with os.fdopen(fd, "a") as rc:
        rc.write("""\
        chimedb:
            db_type:         MySQL
            db:              test
            user_ro:         test
            passwd_ro:       test
            user_rw:         test
            passwd_rw:       test
            host:            127.0.0.1
            port:            32574
        """)

    # Tell chimedb where the database connection config is
    assert os.path.isfile(rcfile), "Could not find {}.".format(rcfile)
    os.environ["CHIMEDB_TEST_RC"] = rcfile

    # Make sure we don't write to the actual chime database
    os.environ["CHIMEDB_TEST_ENABLE"] = "Yes, please."

    db.connect()
    db.orm.create_tables(["chimedb.dataflag.opinion"])

    # insert a user with password ******
    pwd = ":B:0000ffff:e989651ffffcb5bf9b9abedfdab58460"
    db.mediawiki.MediaWikiUser.get_or_create(user_name=user, user_password=pwd)
Esempio n. 3
0
def deactivate(root_or_name):
    """Deactivate a storage node with location or named ROOT_OR_NAME."""

    # We need to write to the database.
    db.connect(read_write=True)

    try:
        node = di.StorageNode.get(name=root_or_name)
    except pw.DoesNotExist:
        if root_or_name[-1] == "/":
            root_or_name = root_or_name[: len(root_or_name) - 1]

        if not os.path.exists(root_or_name):
            print("That is neither a node name, nor a path on this host. " "I quit.")
            exit()
        try:
            node = di.StorageNode.get(
                root=root_or_name, host=socket.gethostname().split(".")[0]
            )
        except pw.DoesNotExist:
            print(
                "That is neither a node name nor a root name that is " "known. I quit."
            )
            exit()

    if not node.active:
        print("There is no node active there any more.")
    else:
        node.active = False
        node.save()
        print("Node successfully deactivated.")
    def setUp(self):
        """Set up chimedb.core for testing with a local dummy DB."""
        (fd, rcfile) = tempfile.mkstemp(text=True)
        with os.fdopen(fd, "a") as rc:
            rc.write("""\
            chimedb:
                db_type:         MySQL
                db:              test
                user_ro:         travis
                passwd_ro:       ""
                user_rw:         travis
                passwd_rw:       ""
                host:            127.0.0.1
                port:            3306
            """)

        # Tell chimedb where the database connection config is
        assert os.path.isfile(rcfile), "Could not find {}.".format(rcfile)
        os.environ["CHIMEDB_TEST_RC"] = rcfile

        # Make sure we don't write to the actual chime database
        os.environ["CHIMEDB_TEST_ENABLE"] = "Yes, please."

        db.connect()

        db.orm.create_tables("chimedb.dataset")
        dget.index()
Esempio n. 5
0
def db_conn():
    """Set up chimedb.core for testing with a local dummy DB."""
    (fd, rcfile) = tempfile.mkstemp(text=True)
    os.close(fd)

    # Tell chimedb where the database connection config is
    assert os.path.isfile(rcfile), "Could not find {}.".format(rcfile)

    os.environ["CHIMEDB_TEST_SQLITE"] = rcfile
    # Make sure we don't write to the actual chime database
    os.environ["CHIMEDB_TEST_ENABLE"] = "Yes, please."

    db.connect()
    db.orm.create_tables(["chimedb.dataflag.opinion"])

    # insert a user
    pwd = ":B:0000ffff:e989651ffffcb5bf9b9abedfdab58460"
    db.mediawiki.MediaWikiUser.get_or_create(user_id=user_id,
                                             user_name=user,
                                             user_password=pwd)

    # insert a user with a password hash we don't understand
    pwd = "1 2 3 4"
    db.mediawiki.MediaWikiUser.get_or_create(user_id=1,
                                             user_name=fail_user,
                                             user_password=pwd)
    db.close()

    yield

    # tear down
    os.remove(rcfile)
    def test_chimedb_test_rc(self):
        # Create an empty on-disk sqlite database
        (fd, dbfile) = tempfile.mkstemp(text=True)
        os.close(fd)

        # Create a rcfile
        (fd, rcfile) = tempfile.mkstemp(text=True)
        with os.fdopen(fd, "a") as rc:
            rc.write("""\
chimedb:
    db_type: sqlite
    db: {0}
""".format(dbfile))

        # This should be ignored
        os.environ["CHIMEDB_TEST_RC"] = rcfile

        db.test_enable()

        db.connect(read_write=True)
        db.proxy.create_tables([TestTable])
        TestTable.create(datum=datum_value)

        # Did that work?
        self.assertEqual(
            TestTable.select(TestTable.datum).scalar(), datum_value)
        db.close()

        # The on-disk sqlite database should not be empty
        stat = os.stat(dbfile)
        self.assertNotEqual(stat.st_size, 0)

        os.unlink(rcfile)
        os.unlink(dbfile)
    def test_no_chimedbrc(self):
        # This is not allowed
        os.environ["CHIMEDB_TEST_RC"] = 'any string containing "chimedbrc"'

        db.test_enable()

        with self.assertRaises(OSError):
            db.connect()
Esempio n. 8
0
def import_file(node, root, acq_name, file_name):
    done = False
    while not done:
        try:
            _import_file(node, root, acq_name, file_name)
            done = True
        except pw.OperationalError:
            log.error(
                "MySQL connexion dropped. Will attempt to reconnect in five seconds."
            )
            time.sleep(5)
            db.connect(read_write=True, reconnect=True)
Esempio n. 9
0
    def test_atomic_autocommit(self):
        @db.atomic(read_write=True)
        def inside_atomic():
            TestTable.update(datum=datum_value + 1).execute()

        # Execute
        inside_atomic()

        # Check
        db.close()
        db.connect()
        self.assertEqual(
            TestTable.select(TestTable.datum).scalar(), datum_value + 1)
Esempio n. 10
0
def status(all):
    """Summarise the status of alpenhorn storage nodes."""

    import tabulate

    db.connect()

    # Data to fetch from the database (node name, total files, total size)
    query_info = (
        di.StorageNode.name,
        pw.fn.Count(di.ArchiveFileCopy.id).alias("count"),
        pw.fn.Sum(di.ArchiveFile.size_b).alias("total_size"),
        di.StorageNode.host,
        di.StorageNode.root,
    )

    # Per node totals
    nodes = (
        di.StorageNode.select(*query_info)
        .join(di.ArchiveFileCopy)
        .where(di.ArchiveFileCopy.has_file == "Y")
        .join(di.ArchiveFile)
        .group_by(di.StorageNode)
        .order_by(di.StorageNode.name)
    )

    if not all:
        nodes = nodes.where(di.StorageNode.active)

    # Totals for the whole archive
    tot = di.ArchiveFile.select(
        pw.fn.Count(di.ArchiveFile.id).alias("count"),
        pw.fn.Sum(di.ArchiveFile.size_b).alias("total_size"),
    ).scalar(as_tuple=True)

    data = [
        [
            node[0],
            int(node[1]),
            int(node[2]) / 2 ** 40.0,
            100.0 * int(node[1]) / int(tot[0]),
            100.0 * int(node[2]) / int(tot[1]),
            "%s:%s" % (node[3], node[4]),
        ]
        for node in nodes.tuples()
    ]

    headers = ["Node", "Files", "Size [TB]", "Files [%]", "Size [%]", "Path"]

    print(tabulate.tabulate(data, headers=headers, floatfmt=".1f"))
Esempio n. 11
0
    def test_atomic_raise(self):
        @db.atomic(read_write=True)
        def inside_atomic():
            TestTable.update(datum=datum_value + 1).execute()

            raise RuntimeError

        # Execute
        with self.assertRaises(RuntimeError):
            inside_atomic()

        # Check
        db.close()
        db.connect()
        self.assertEqual(
            TestTable.select(TestTable.datum).scalar(), datum_value)
Esempio n. 12
0
    def __init__(self):

        super(FilterExisting, self).__init__()

        self.csd_list = []
        self.corr_files = {}

        if mpiutil.rank0:
            # Look for CSDs in the current directory
            import glob

            files = glob.glob("*")
            if self.existing_csd_regex:
                for file_ in files:
                    mo = re.search(self.existing_csd_regex, file_)
                    if mo is not None:
                        self.csd_list.append(int(mo.group(1)))

            # Search the database to get the start and end times of all correlation files
            from chimedb import data_index as di
            from chimedb.core import connect
            from ch_util import ephemeris

            connect()
            query = (di.ArchiveFile.select(
                di.ArchiveAcq.name,
                di.ArchiveFile.name,
                di.CorrFileInfo.start_time,
                di.CorrFileInfo.finish_time,
            ).join(di.ArchiveAcq).switch(di.ArchiveFile).join(di.CorrFileInfo))

            for acq, fname, start, finish in query.tuples():

                if start is None or finish is None:
                    continue

                start_csd = ephemeris.csd(start)
                finish_csd = ephemeris.csd(finish)

                name = os.path.join(acq, fname)
                self.corr_files[name] = (start_csd, finish_csd)

            self.log.debug("Skipping existing CSDs %s", repr(self.csd_list))

        # Broadcast results to other ranks
        self.corr_files = mpiutil.world.bcast(self.corr_files, root=0)
        self.csd_list = mpiutil.world.bcast(self.csd_list, root=0)
Esempio n. 13
0
def active(host):
    """list the nodes active on this, or another specified, machine"""

    db.connect()

    if host is None:
        host = socket.gethostname().split(".")[0]
    zero = True
    for node in di.StorageNode.select().where(
        di.StorageNode.host == host, di.StorageNode.active == True
    ):
        n_file = (
            di.ArchiveFileCopy.select().where(di.ArchiveFileCopy.node == node).count()
        )
        print("%-25s %-30s %5d files" % (node.name, node.root, n_file))
        zero = False
    if zero:
        print("No nodes are active on host %s." % host)
Esempio n. 14
0
    def __init__(
        self,
        broker_host,
        broker_port,
        redis_host,
        redis_port,
        log_level,
        failure_wait_time,
    ):
        logger.setLevel(log_level)

        # convert ms to s
        self.failure_wait_time = failure_wait_time / 1000

        startup_time = datetime.datetime.utcnow()
        config = {
            "broker_host": broker_host,
            "broker_port": broker_port,
            "redis_host": redis_host,
            "redis_port": redis_port,
            "log_level": log_level,
            "failure_wait_time": failure_wait_time,
        }

        manager = Manager(broker_host, broker_port)
        try:
            manager.register_start(startup_time, __version__, config)
        except (CometError, ConnectionError) as exc:
            logger.error(
                "Comet archiver failed registering its startup and initial config: {}"
                .format(exc))
            exit(1)

        # Open database connection
        chimedb.connect(read_write=True)

        # Create any missing table.
        chimedb.orm.create_tables("chimedb.dataset")

        # Open connection to redis
        self.redis = redis.Redis(redis_host,
                                 redis_port,
                                 encoding="utf-8",
                                 decode_responses=True)
Esempio n. 15
0
    def test_chimedb_test_sqlite(self):
        # Create an empty on-disk sqlite database
        (fd, dbfile) = tempfile.mkstemp(text=True)
        os.close(fd)

        os.environ["CHIMEDB_TEST_SQLITE"] = dbfile

        db.test_enable()

        db.connect(read_write=True)
        db.proxy.create_tables([TestTable])
        TestTable.create(datum=datum_value)

        # Did that work?
        self.assertEqual(
            TestTable.select(TestTable.datum).scalar(), datum_value)
        db.close()

        # The on-disk sqlite database should not be empty anymore
        stat = os.stat(dbfile)
        self.assertNotEqual(stat.st_size, 0)
Esempio n. 16
0
def cli():
    """Alpenhorn data management service."""

    # We need write access to the DB.
    db.connect(read_write=True)

    # Get the name of this host
    host = socket.gethostname().split(".")[0]

    # Get the list of nodes currently mounted
    node_list = list(di.StorageNode.select().where(di.StorageNode.host == host,
                                                   di.StorageNode.active))

    # Warn if there are no mounted nodes. We used to exit here, but actually
    # it's useful to keep alpenhornd running for nodes where we exclusively use
    # transport disks (e.g. jingle)
    if len(node_list) == 0:
        log.warn('No nodes on this host ("%s") registered in the DB!' % host)

    # Load the cache of already imported files
    auto_import.load_import_cache()

    # Setup the observers to watch the nodes for new files
    auto_import.setup_observers(node_list)

    # Enter main loop performing node updates
    try:
        update.update_loop(host)

    # Exit cleanly on a keyboard interrupt
    except KeyboardInterrupt:
        log.info("Exiting...")
        auto_import.stop_observers()

    # Wait for watchdog threads to terminate
    auto_import.join_observers()
Esempio n. 17
0
def verify(node_name, md5, fixdb, acq):
    """Verify the archive on NODE against the database."""

    db.connect()

    try:
        this_node = di.StorageNode.get(di.StorageNode.name == node_name)
    except pw.DoesNotExist:
        print("Specified node does not exist.")
        return

    ## Use a complicated query with a tuples construct to fetch everything we
    ## need in a single query. This massively speeds up the whole process versus
    ## fetching all the FileCopy's then querying for Files and Acqs.
    lfiles = (
        di.ArchiveFile.select(
            di.ArchiveFile.name,
            di.ArchiveAcq.name,
            di.ArchiveFile.size_b,
            di.ArchiveFile.md5sum,
            di.ArchiveFileCopy.id,
        )
        .join(di.ArchiveAcq)
        .switch(di.ArchiveFile)
        .join(di.ArchiveFileCopy)
        .where(di.ArchiveFileCopy.node == this_node, di.ArchiveFileCopy.has_file == "Y")
        .tuples()
    )

    missing_files = []
    corrupt_files = []

    missing_ids = []
    corrupt_ids = []

    nfiles = 0

    with click.progressbar(lfiles, label="Scanning files") as lfiles_iter:
        for filename, acqname, filesize, md5sum, fc_id in lfiles_iter:

            # Skip if not in specified acquisitions
            if len(acq) > 0 and acqname not in acq:
                continue

            nfiles += 1

            filepath = this_node.root + "/" + acqname + "/" + filename

            # Check if file is plain missing
            if not os.path.exists(filepath):
                missing_files.append(filepath)
                missing_ids.append(fc_id)
                continue

            if md5:
                file_md5 = di.util.md5sum_file(filepath)
                corrupt = file_md5 != md5sum
            else:
                corrupt = os.path.getsize(filepath) != filesize

            if corrupt:
                corrupt_files.append(filepath)
                corrupt_ids.append(fc_id)
                continue

    if len(missing_files) > 0:
        print()
        print("=== Missing files ===")
        for fname in missing_files:
            print(fname)

    if len(corrupt_files) > 0:
        print()
        print("=== Corrupt files ===")
        for fname in corrupt_files:
            print(fname)

    print()
    print("=== Summary ===")
    print("  %i total files" % nfiles)
    print("  %i missing files" % len(missing_files))
    print("  %i corrupt files" % len(corrupt_files))
    print()

    # Fix up the database by marking files as missing, and marking
    # corrupt files for verification by alpenhornd.
    if fixdb:

        # Make sure we connect RW
        db.connect(read_write=True)

        if (len(missing_files) > 0) and click.confirm("Fix missing files"):
            missing_count = (
                di.ArchiveFileCopy.update(has_file="N")
                .where(di.ArchiveFileCopy.id << missing_ids)
                .execute()
            )
            print("  %i marked as missing" % missing_count)

        if (len(corrupt_files) > 0) and click.confirm("Fix corrupt files"):
            corrupt_count = (
                di.ArchiveFileCopy.update(has_file="M")
                .where(di.ArchiveFileCopy.id << corrupt_ids)
                .execute()
            )
            print("  %i corrupt files marked for verification" % corrupt_count)
Esempio n. 18
0
def _import_file(node, root, acq_name, file_name):
    """Import a file into the DB.

    This routine adds the following to the database, if they do not already exist
    (or might be corrupted).
    - The acquisition that the file is a part of.
    - Information on the acquisition, if it is of type "corr".
    - The file.
    - Information on the file, if it is of type "corr".
    - Indicates that the file exists on this node.
    """
    global import_done
    curr_done = True
    fullpath = "%s/%s/%s" % (root, acq_name, file_name)
    log.debug("Considering %s for import." % fullpath)

    # Skip the file if ch_master.py still has a lock on it.
    if os.path.isfile("%s/%s/.%s.lock" % (root, acq_name, file_name)):
        log.debug('Skipping "%s", which is locked by ch_master.py.' % fullpath)
        return

    # Parse the path
    try:
        ts, inst, atype = di.util.parse_acq_name(acq_name)
    except db.ValidationError:
        log.info("Skipping non-acquisition path %s." % acq_name)
        return

    if import_done is not None:
        i = bisect.bisect_left(import_done, fullpath)
        if i != len(import_done) and import_done[i] == fullpath:
            log.debug("Skipping already-registered file %s." % fullpath)
            return

    # Figure out which acquisition this is; add if necessary.
    try:
        acq = di.ArchiveAcq.get(di.ArchiveAcq.name == acq_name)
        log.debug('Acquisition "%s" already in DB. Skipping.' % acq_name)
    except pw.DoesNotExist:
        acq = add_acq(acq_name)
        if acq is None:
            return
        log.info('Acquisition "%s" added to DB.' % acq_name)

    # What kind of file do we have?
    ftype = di.util.detect_file_type(file_name)
    if ftype is None:
        log.info('Skipping unrecognised file "%s/%s".' % (acq_name, file_name))
        return

    # Make sure information about the acquisition exists in the DB.
    if atype == "corr" and ftype.name == "corr":
        if not acq.corrinfos.count():
            try:
                di.CorrAcqInfo.create(
                    acq=acq, **get_acqcorrinfo_keywords_from_h5(fullpath))
                log.info(
                    'Added information for correlator acquisition "%s" to '
                    "DB." % acq_name)
            except:
                log.warning('Missing info for acquistion "%s": HDF5 datasets '
                            "empty. Leaving fields NULL." % (acq_name))
                di.CorrAcqInfo.create(acq=acq)
    elif atype == "hfb" and ftype.name == "hfb":
        if not acq.hfbinfos.count():
            try:
                di.HFBAcqInfo.create(
                    acq=acq, **get_acqhfbinfo_keywords_from_h5(fullpath))
                log.info('Added information for HFB acquisition "%s" to '
                         "DB." % acq_name)
            except:
                log.warning('Missing info for acquistion "%s": HDF5 datasets '
                            "empty. Leaving fields NULL." % (acq_name))
                di.HFBAcqInfo.create(acq=acq)
    elif atype == "hk" and ftype.name == "hk":
        try:
            keywords = get_acqhkinfo_keywords_from_h5("%s/%s" %
                                                      (root, acq_name))
        except:
            log.warning("Could no open atmel_id.dat file. Skipping.")
            keywords = []
        for kw in keywords:
            if not sum(1 for _ in di.HKAcqInfo.select().where(
                    di.HKAcqInfo.acq == acq).where(
                        di.HKAcqInfo.atmel_name == kw["atmel_name"])):
                try:
                    di.HKAcqInfo.create(acq=acq, **kw)
                    log.info(
                        'Added information for housekeeping acquisition "%s", '
                        "board %s to DB." % (acq_name, kw["atmel_name"]))
                except:
                    log.warning(
                        'Missing info for acquisition "%s": atmel_id.dat '
                        "file missing or corrupt. Skipping this acquisition." %
                        acq_name)
                    return
    elif atype == "rawadc":
        if not acq.rawadcinfos.count():
            di.RawadcAcqInfo.create(
                acq=acq, **get_acqrawadcinfo_keywords_from_h5(acq_name))
            log.info('Added information for raw ADC acquisition "%s" to '
                     "DB." % acq_name)

    # Add the file, if necessary.
    try:
        file = di.ArchiveFile.get(di.ArchiveFile.name == file_name,
                                  di.ArchiveFile.acq == acq)
        size_b = file.size_b
        log.debug('File "%s/%s" already in DB. Skipping.' %
                  (acq_name, file_name))
    except pw.DoesNotExist:
        log.debug("Computing md5sum.")
        md5sum = di.util.md5sum_file(fullpath, cmd_line=True)
        size_b = os.path.getsize(fullpath)
        done = False
        while not done:
            try:
                file = di.ArchiveFile.create(acq=acq,
                                             type=ftype,
                                             name=file_name,
                                             size_b=size_b,
                                             md5sum=md5sum)
                done = True
            except pw.OperationalError:
                log.error(
                    "MySQL connexion dropped. Will attempt to reconnect in "
                    "five seconds.")
                time.sleep(5)
                db.connect(read_write=True, reconnect=True)
        log.info('File "%s/%s" added to DB.' % (acq_name, file_name))

    # Register the copy of the file here on the collection server, if (1) it does
    # not exist, or (2) it does exist but has been labelled as corrupt. If (2),
    # check again.
    # Use a transaction to avoid race condition
    with db.proxy.transaction():
        if not file.copies.where(di.ArchiveFileCopy.node == node).count():
            copy = di.ArchiveFileCopy.create(file=file,
                                             node=node,
                                             has_file="Y",
                                             wants_file="Y")
            log.info('Registered file copy "%s/%s" to DB.' %
                     (acq_name, file_name))

    # Make sure information about the file exists in the DB.
    if ftype.name == "corr":
        # Add if (1) there is no corrinfo or (2) the corrinfo is missing.
        if not file.corrinfos.count():
            try:
                di.CorrFileInfo.create(
                    file=file, **get_filecorrinfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.corrinfos.count():
                    di.CorrFileInfo.create(file=file)
                log.warning('Missing info for file "%s/%s": HDF5 datasets '
                            "empty or unreadable. Leaving fields NULL." %
                            (acq_name, file_name))
        elif not file.corrinfos[0].start_time:
            try:
                i = file.corrinfos[0]
                k = get_filecorrinfo_keywords_from_h5(fullpath)
            except:
                log.debug('Still missing info for file "%s/%s".')
            else:
                i.start_time = k["start_time"]
                i.finish_time = k["finish_time"]
                i.chunk_number = k["chunk_number"]
                i.freq_number = k["freq_number"]
                i.save()
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
    elif ftype.name == "hfb":
        # Add if (1) there is no corrinfo or (2) the corrinfo is missing.
        if not file.hfbinfos.count():
            try:
                di.HFBFileInfo.create(
                    file=file, **get_filehfbinfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.hfbinfos.count():
                    di.HFBFileInfo.create(file=file)
                log.warning('Missing info for file "%s/%s": HDF5 datasets '
                            "empty or unreadable. Leaving fields NULL." %
                            (acq_name, file_name))
        elif not file.hfbinfos[0].start_time:
            try:
                i = file.hfbinfos[0]
                k = get_filehfbinfo_keywords_from_h5(fullpath)
            except:
                log.debug('Still missing info for file "%s/%s".')
            else:
                i.start_time = k["start_time"]
                i.finish_time = k["finish_time"]
                i.chunk_number = k["chunk_number"]
                i.freq_number = k["freq_number"]
                i.save()
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
    elif ftype.name == "hk":
        # Add if (1) there is no hkinfo or (2) the hkinfo is missing.
        if not file.hkinfos.count():
            try:
                di.HKFileInfo.create(
                    file=file, **get_filehkinfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.corrinfos.count():
                    di.HKFileInfo.create(file=file)
                log.warning('Missing info for file "%s/%s": HDF5 datasets '
                            "empty or unreadable. Leaving fields NULL." %
                            (acq_name, file_name))
        elif not file.hkinfos[0].start_time:
            try:
                i = file.hkinfos[0]
                k = get_filehkinfo_keywords_from_h5(fullpath)
            except:
                log.debug('Still missing info for file "%s/%s".')
            else:
                i.start_time = k["start_time"]
                i.finish_time = k["finish_time"]
                i.atmel_name = k["atmel_name"]
                i.chunk_number = k["chunk_number"]
                i.save()
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
    elif ftype.name == "weather":
        # Add if (1) there is no weatherinfo or (2) the weatherinfo is missing.
        if not file.weatherinfos.count():
            #      try:
            di.WeatherFileInfo.create(
                file=file, **get_fileweatherinfo_keywords_from_h5(fullpath))
            log.info('Added information for file "%s/%s" to DB.' %
                     (acq_name, file_name))
        #      except:
        #        if not file.corrinfos.count():
        #          di.WeatherFileInfo.create(file=file)
        #        log.warning("Missing info for file \"%s/%s\": HDF5 datasets " \
        #                    "empty or unreadable. Leaving fields NULL." %
        #                    (acq_name, file_name))
        elif not file.weatherinfos[0].start_time:
            try:
                i = file.weatherinfos[0]
                k = get_fileweatherinfo_keywords_from_h5(fullpath)
            except:
                log.debug('Still missing info for file "%s/%s".')
            else:
                i.start_time = k["start_time"]
                i.finish_time = k["finish_time"]
                i.date = k["date"]
                i.save()
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))

    elif ftype.name == "rawadc":
        # Add if there is no rawadcinfo
        if not file.rawadcinfos.count():
            try:
                di.RawadcFileInfo.create(
                    file=file, **get_filerawadcinfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.rawadcinfos.count():
                    di.RawadcFileInfo.create(file=file)
                log.warning(
                    'Missing info for file "%s/%s". Leaving fields NULL.' %
                    (acq_name, file_name))

    elif ftype.name == "hkp":
        # Add if there is no hkpinfo
        if not file.hkpinfos.count():
            try:
                di.HKPFileInfo.create(
                    file=file, **get_filehkpinfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.hkpinfos.count():
                    di.HKPFileInfo.create(file=file)
                log.warning(
                    'Missing info for file "%s/%s". Leaving fields NULL.' %
                    (acq_name, file_name))
    elif atype == "digitalgain" and ftype.name == "calibration":
        if not file.digitalgaininfos.count():
            try:
                di.DigitalGainFileInfo.create(
                    file=file,
                    **get_filedigitalgaininfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.digitalgaininfos.count():
                    di.DigitalGainFileInfo.create(file=file)
                log.warning(
                    'Missing info for file "%s/%s". Leaving fields NULL.' %
                    (acq_name, file_name))
    elif atype == "gain" and ftype.name == "calibration":
        if not file.calibrationgaininfos.count():
            try:
                di.CalibrationGainFileInfo.create(
                    file=file,
                    **get_filecalibrationgaininfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.calibrationgaininfos.count():
                    di.CalibrationGainFileInfo.create(file=file)
                log.warning(
                    'Missing info for file "%s/%s". Leaving fields NULL.' %
                    (acq_name, file_name))
    elif atype == "flaginput" and ftype.name == "calibration":
        if not file.flaginputinfos.count():
            try:
                di.FlagInputFileInfo.create(
                    file=file,
                    **get_fileflaginputinfo_keywords_from_h5(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))
            except:
                if not file.flaginputinfos.count():
                    di.FlagInputFileInfo.create(file=file)
                log.warning(
                    'Missing info for file "%s/%s". Leaving fields NULL.' %
                    (acq_name, file_name))

    elif atype == "misc" and ftype.name == "miscellaneous":
        with db.proxy.atomic():
            if not file.miscfileinfos.count():
                di.MiscFileInfo.create(file=file,
                                       **get_miscfile_data(fullpath))
                log.info('Added information for file "%s/%s" to DB.' %
                         (acq_name, file_name))

    if import_done is not None:
        bisect.insort_left(import_done, fullpath)
        with open(LOCAL_IMPORT_RECORD, "w") as fp:
            fp.write("\n".join(import_done))
Esempio n. 19
0
    def _available_files(self, start_csd, end_csd):
        """
        Return chimestack files available in cedar_online between start_csd and end_csd, if all of the files for that period are available online.

        Return an empty list if files between start_csd and end_csd are only partially available online.

        Total file count is verified by checking files that exist everywhere.

        Parameters
        ----------
        start_csd : int
            Start date in sidereal day format
        end_csd : int
            End date in sidereal day format

        Returns
        -------
        list
            List contains the chimestack files available in the timespan, if all of them are available online

        """

        # Connect to databases
        db.connect()

        # Get timestamps in unix format
        # Needed for queries
        start_time = ephemeris.csd_to_unix(start_csd)
        end_time = ephemeris.csd_to_unix(end_csd)

        # We will want to know which files are in chime_online and nearline on cedar
        online_node = di.StorageNode.get(name="cedar_online", active=True)
        chimestack_inst = di.ArchiveInst.get(name="chimestack")

        # TODO if the time range is so small that it’s completely contained within a single file, nothing will be returned
        # have to special-case it by looking for files which start before the start time and end after the end time).

        archive_files = (di.ArchiveFileCopy.select(
            di.CorrFileInfo.start_time,
            di.CorrFileInfo.finish_time,
        ).join(di.ArchiveFile).join(di.ArchiveAcq).switch(di.ArchiveFile).join(
            di.CorrFileInfo))

        # chimestack files available online which include between start and end_time

        files_that_exist = archive_files.where(
            di.ArchiveAcq.inst ==
            chimestack_inst,  # specifically looking for chimestack files
            di.CorrFileInfo.start_time <
            end_time,  # which contain data that includes start time and end time
            di.CorrFileInfo.finish_time >= start_time,
            di.ArchiveFileCopy.has_file == "Y",
        )

        files_online = files_that_exist.where(
            di.ArchiveFileCopy.node == online_node,  # that are online
        )

        filenames_online = sorted([t for t in files_online.tuples()])

        # files_that_exist might contain the same file multiple files
        # if it exists in multiple locations (nearline, online, gossec, etc)
        # we only want to include it once
        filenames_that_exist = sorted(
            list(set(t for t in files_that_exist.tuples())))

        return filenames_online, filenames_that_exist
Esempio n. 20
0
    def _create_hook(self):
        """Create the revision.

        This tries to determine which days are good and bad, and partitions the
        available good days into the individual stacks.
        """

        days = {}

        core.connect()

        # Go over each revision and construct the set of LSDs we should stack, and save
        # the path to each.
        # NOTE: later entries is `daily_revisions` will override LSDs found in earlier
        # revisions.
        for rev in self.default_params["daily_revisions"]:

            daily_path = (
                self.root_path
                if self.default_params["daily_root"] is None
                else self.default_params["daily_root"]
            )
            daily_rev = daily.DailyProcessing(rev, root_path=daily_path)

            # Get all the bad days in this revision
            revision = df.DataRevision.get(name=rev)
            query = (
                df.DataFlagOpinion.select(df.DataFlagOpinion.lsd)
                .distinct()
                .where(
                    df.DataFlagOpinion.revision == revision,
                    df.DataFlagOpinion.decision == "bad",
                )
            )
            bad_days = [x[0] for x in query.tuples()]

            # Get all the good days
            query = (
                df.DataFlagOpinion.select(df.DataFlagOpinion.lsd)
                .distinct()
                .where(
                    df.DataFlagOpinion.revision == revision,
                    df.DataFlagOpinion.decision == "good",
                )
            )
            good_days = [x[0] for x in query.tuples()]

            for d in daily_rev.ls():

                # Filter out known bad days here
                if (int(d) in bad_days) or (int(d) not in good_days):
                    continue

                # Insert the day and path into the dict, this will replace the entries
                # from prior revisions
                path = daily_rev.base_path / d
                lsd = int(d)
                days[lsd] = path

        lsds = sorted(days)

        # Map each LSD into the quarter it belongs in and find which quarters we have
        # data for
        dates = ctime.unix_to_datetime(ephemeris.csd_to_unix(np.array(lsds)))
        yq = np.array([f"{d.year}q{(d.month - 1) // 3 + 1}" for d in dates])
        quarters = np.unique(yq)

        npart = self.default_params["partitions"]

        lsd_partitions = {}

        # For each quarter divide the LSDs it contains into a number of partitions to
        # give jack knifes
        for quarter in quarters:

            lsds_in_quarter = sorted(np.array(lsds)[yq == quarter])

            # Skip quarters with two few days in them
            if len(lsds_in_quarter) < self.default_params["min_days"] * npart:
                continue

            for i in range(npart):
                lsd_partitions[f"{quarter}p{i}"] = [
                    int(d) for d in lsds_in_quarter[i::npart]
                ]

        # Save the relevant parameters into the revisions configuration
        self.default_params["days"] = {
            int(day): str(path) for day, path in days.items()
        }
        self.default_params["stacks"] = lsd_partitions
Esempio n. 21
0
def format_transport(serial_num):
    """Interactive routine for formatting a transport disc as a storage
    node; formats and labels the disc as necessary, the adds to the
    database. The disk is specified using the manufacturers
    SERIAL_NUM, which is printed on the disk.
    """

    if os.getuid() != 0:
        print("You must be root to run mount on a transport disc. I quit.")
        return

    # Make sure we connect RW
    db.connect(read_write=True)

    # Find the disc.
    dev = glob.glob("/dev/disk/by-id/*%s" % serial_num)
    if len(dev) == 0:
        print("No disc with that serial number is attached.")
        return
    elif len(dev) > 1:
        print("Confused: found more than one device matching that serial number:")
        for d in dev:
            print("  %s" % dev)
        print("Aborting.")
        return
    dev = dev[0]
    dev_part = "%s-part1" % dev

    # Figure out if it is formatted.
    print("Checking to see if disc is formatted. Please wait.")
    fp = os.popen("parted -s %s print" % dev)
    formatted = False
    part_start = False
    while True:
        l = fp.readline()
        if not l:
            break
        if l.find("Number") == 0 and l.find("Start") > 0 and l.find("File system") > 0:
            part_start = True
        elif l.strip() != "" and part_start:
            formatted = True
    fp.close()

    if not formatted:
        if not click.confirm("Disc is not formatted. Should I format it?"):
            return
        print("Creating partition. Please wait.")
        os.system(
            "parted -s -a optimal %s mklabel gpt -- mkpart primary 0%% 100%%" % dev
        )
        print("Formatting disc. Please wait.")
        time.sleep(5)  # Sleep for a few seconds to allow the partition to appear
        os.system("mkfs.ext4 %s -m 0 -L CH-%s" % (dev_part, serial_num))
    else:
        print("Disc is already formatted.")

    e2label = get_e2label(dev_part)
    name = "CH-%s" % serial_num
    if e2label and e2label != name:
        print(
            "Disc label %s does not conform to labelling standard, "
            "which is CH-<serialnum>."
        )
        exit
    elif not e2label:
        print('Labelling the disc as "%s" (using e2label) ...' % (name))
        assert dev_part is not None
        assert len(name) <= MAX_E2LABEL_LEN
        stat = os.system("/sbin/e2label %s %s" % (dev_part, name))
        if stat:
            print("Failed to e2label! Stat = %s. I quit." % (stat))
            exit()

    # Ensure the mount path exists.
    root = "/mnt/%s" % name
    if not os.path.isdir(root):
        print("Creating mount point %s." % root)
        os.mkdir(root)

    # Check to see if the disc is active.
    fp = os.popen("df")
    active = False
    dev_part_abs = os.path.realpath(dev_part)
    while 1:
        l = fp.readline()
        if not l:
            break
        if l.find(root) > 0:
            if l[: len(dev_part)] == dev or l[: len(dev_part_abs)] == dev_part_abs:
                active = True
            else:
                print(
                    "%s is a mount point, but %s is already active there."(
                        root, l.split()[0]
                    )
                )
    fp.close()

    try:
        node = di.StorageNode.get(name=name)
    except pw.DoesNotExist:
        print(
            "This disc has not been registered yet as a storage node. "
            "Registering now."
        )
        try:
            group = di.StorageGroup.get(name="transport")
        except pw.DoesNotExist:
            print('Hmmm. Storage group "transport" does not exist. I quit.')
            exit()

        # We need to write to the database.
        db.connect(read_write=True)
        node = di.StorageNode.create(
            name=name, root=root, group=group, storage_type="T", min_avail_gb=1
        )

        print("Successfully created storage node.")

    print("Node created but not active. Run alpenhorn mount_transport for that.")
Esempio n. 22
0
 def test_connect_rw(self):
     db.connect(read_write=True)
     TestTable.update(datum=datum_value * 2).execute()
     self.assertEqual(TestTable.select(TestTable.datum).scalar(), datum_value * 2)
Esempio n. 23
0
 def test_connect(self):
     db.connect()
     self.assertEqual(TestTable.select(TestTable.datum).scalar(), datum_value)
Esempio n. 24
0
def import_files(node_name, verbose, acq, dry):
    """Scan the current directory for known acquisition files and add them into the database for NODE.

    This command is useful for manually maintaining an archive where we can run
    alpenhornd in the usual manner.
    """
    db.connect(read_write=True)

    # Construct list of acqs to scan
    if acq is None:
        acqs = glob.glob("*")
    else:
        acqs = acq

    # Keep track of state as we process the files
    added_files = []  # Files we have added to the database
    corrupt_files = []  # Known files which are corrupt
    registered_files = []  # Files already registered in the database
    unknown_files = []  # Files not known in the database
    not_acqs = []  # Directories which were not known acquisitions

    # Fetch a reference to the node
    try:
        node = di.StorageNode.select().where(di.StorageNode.name == node_name).get()
    except pw.DoesNotExist:
        print("Unknown node.")
        return

    with click.progressbar(acqs, label="Scanning acquisitions") as acq_iter:

        for acq_name in acq_iter:

            try:
                di.util.parse_acq_name(acq_name)
            except db.ValidationError:
                not_acqs.append(acq_name)
                continue

            try:
                acq = di.ArchiveAcq.select().where(di.ArchiveAcq.name == acq_name).get()
            except pw.DoesNotExist:
                not_acqs.append(acq_name)
                continue

            files = glob.glob(acq_name + "/*")

            # Fetch lists of all files in this acquisition, and all
            # files in this acq with local copies
            file_names = [f.name for f in acq.files]
            local_file_names = [
                f.name
                for f in acq.files.join(di.ArchiveFileCopy).where(
                    di.ArchiveFileCopy.node == node
                )
            ]

            for fn in files:
                f_name = os.path.split(fn)[1]

                # Check if file exists in database
                if f_name not in file_names:
                    unknown_files.append(fn)
                    continue

                # Check if file is already registered on this node
                if f_name in local_file_names:
                    registered_files.append(fn)
                else:
                    archive_file = (
                        di.ArchiveFile.select()
                        .where(di.ArchiveFile.name == f_name, di.ArchiveFile.acq == acq)
                        .get()
                    )

                    if os.path.getsize(fn) != archive_file.size_b:
                        corrupt_files.append(fn)
                        continue

                    added_files.append(fn)
                    if not dry:
                        di.ArchiveFileCopy.create(
                            file=archive_file, node=node, has_file="Y", wants_file="Y"
                        )

    print("\n==== Summary ====")
    print()
    print("Added %i files" % len(added_files))
    print()
    print("%i corrupt files." % len(corrupt_files))
    print("%i files already registered." % len(registered_files))
    print("%i files not known" % len(unknown_files))
    print("%i directories were not acquisitions." % len(not_acqs))

    if verbose > 0:
        print()
        print("Added files:")
        print()

        for fn in added_files:
            print(fn)

    if verbose > 1:

        print("Corrupt:")
        for fn in corrupt_files:
            print(fn)
        print()

        print("Unknown files:")
        for fn in unknown_files:
            print(fn)
        print()

        print("Unknown acquisitions:")
        for fn in not_acqs:
            print(fn)
        print()
Esempio n. 25
0
def update_node_requests(node):
    """Process file copy requests onto this node."""

    global done_transport_this_cycle

    # Ensure we are not on an HPSS node
    if is_hpss_node(node):
        log.error("Cannot process HPSS node here.")
        return

    # Skip if node is too full
    if node.avail_gb < (node.min_avail_gb + 10):
        log.info("Node %s is nearly full. Skip transfers." % node.name)
        return

    # Calculate the total archive size from the database
    size_query = (di.ArchiveFile.select(fn.Sum(di.ArchiveFile.size_b)).join(
        di.ArchiveFileCopy).where(di.ArchiveFileCopy.node == node,
                                  di.ArchiveFileCopy.has_file == "Y"))
    size = size_query.scalar(as_tuple=True)[0]
    current_size_gb = float(0.0 if size is None else size) / 2**30.0

    # Stop if the current archive size is bigger than the maximum (if set, i.e. > 0)
    if current_size_gb > node.max_total_gb and node.max_total_gb > 0.0:
        log.info(
            "Node %s has reached maximum size (current: %.1f GB, limit: %.1f GB)"
            % (node.name, current_size_gb, node.max_total_gb))
        return

    # ... OR if this is a transport node quit if the transport cycle is done.
    if node.storage_type == "T" and done_transport_this_cycle:
        log.info("Ignoring transport node %s" % node.name)
        return

    start_time = time.time()

    # Fetch requests to process from the database
    requests = di.ArchiveFileCopyRequest.select().where(
        ~di.ArchiveFileCopyRequest.completed,
        ~di.ArchiveFileCopyRequest.cancelled,
        di.ArchiveFileCopyRequest.group_to == node.group,
    )

    # Add in constraint that node_from cannot be an HPSS node
    requests = requests.join(
        di.StorageNode).where(di.StorageNode.address != "HPSS")

    for req in requests:
        if time.time() - start_time > max_time_per_node_operation:
            break  # Don't hog all the time.

        # By default, if a copy fails, we mark the source file as suspect
        # so it gets re-MD5'd on the source node.
        check_source_on_err = True

        # Only continue if the node is actually active
        if not req.node_from.active:
            continue

        # For transport disks we should only copy onto the transport
        # node if the from_node is local, this should prevent pointlessly
        # rsyncing across the network
        if node.storage_type == "T" and node.host != req.node_from.host:
            log.debug(
                "Skipping request for %s/%s from remote node [%s] onto local "
                "transport disks" %
                (req.file.acq.name, req.file.name, req.node_from.name))
            continue

        # Only proceed if the destination file does not already exist.
        try:
            di.ArchiveFileCopy.get(
                di.ArchiveFileCopy.file == req.file,
                di.ArchiveFileCopy.node == node,
                di.ArchiveFileCopy.has_file == "Y",
            )
            log.info("Skipping request for %s/%s since it already exists on "
                     'this node ("%s"), and updating DB to reflect this.' %
                     (req.file.acq.name, req.file.name, node.name))
            di.ArchiveFileCopyRequest.update(completed=True).where(
                di.ArchiveFileCopyRequest.file == req.file).where(
                    di.ArchiveFileCopyRequest.group_to ==
                    node.group).execute()
            continue
        except pw.DoesNotExist:
            pass

        # Only proceed if the source file actually exists (and is not corrupted).
        try:
            di.ArchiveFileCopy.get(
                di.ArchiveFileCopy.file == req.file,
                di.ArchiveFileCopy.node == req.node_from,
                di.ArchiveFileCopy.has_file == "Y",
            )
        except pw.DoesNotExist:
            log.error(
                "Skipping request for %s/%s since it is not available on "
                'node "%s". [file_id=%i]' % (req.file.acq.name, req.file.name,
                                             req.node_from.name, req.file.id))
            continue

        # Check that there is enough space available.
        if node.avail_gb * 2**30.0 < 2.0 * req.file.size_b:
            log.warning('Node "%s" is full: not adding datafile "%s/%s".' %
                        (node.name, req.file.acq.name, req.file.name))
            continue

        # Constuct the origin and destination paths.
        from_path = "%s/%s/%s" % (req.node_from.root, req.file.acq.name,
                                  req.file.name)
        if req.node_from.host != node.host:
            from_path = "%s@%s:%s" % (
                req.node_from.username,
                req.node_from.address,
                from_path,
            )

        to_path = "%s/%s/" % (node.root, req.file.acq.name)
        if not os.path.isdir(to_path):
            log.info('Creating directory "%s".' % to_path)
            os.mkdir(to_path)

        # Giddy up!
        log.info('Transferring file "%s/%s".' %
                 (req.file.acq.name, req.file.name))
        st = time.time()

        # For the potential error message later
        stderr = None

        # Attempt to transfer the file. Each of the methods below needs to set a
        # return code `ret` and give an `md5sum` of the transferred file.

        # First we need to check if we are copying over the network
        if req.node_from.host != node.host:

            # First try bbcp which is a fast multistream transfer tool. bbcp can
            # calculate the md5 hash as it goes, so we'll do that to save doing
            # it at the end.
            if command_available("bbcp"):
                ret, stdout, stderr = run_command([
                    "bbcp",
                    "-V",
                    "-f",
                    "-z",
                    "--port",
                    "4200",
                    "-W",
                    "4M",
                    "-s",
                    "16",
                    "-e",
                    "-E",
                    "%md5=",
                    from_path,
                    to_path,
                ])

                # Attempt to parse STDERR for the md5 hash
                if ret == 0:
                    mo = re.search("md5 ([a-f0-9]{32})", stderr)
                    if mo is None:
                        log.error(
                            "BBCP transfer has gone awry. STDOUT: %s\n STDERR: %s"
                            % (stdout, stderr))
                        ret = -1
                    md5sum = mo.group(1)
                else:
                    md5sum = None

            # Next try rsync over ssh.
            elif command_available("rsync"):
                ret, stdout, stderr = run_command(
                    ["rsync", "--compress"] + RSYNC_OPTS + [
                        "--rsync-path=ionice -c2 -n4 rsync",
                        "--rsh=ssh -q",
                        from_path,
                        to_path,
                    ])

                # rsync v3+ already does a whole-file MD5 sum while
                # transferring and guarantees the written file has the same
                # MD5 sum as the source file, so we can skip the check here.
                md5sum = req.file.md5sum if ret == 0 else None

                # If the rsync error occured during `mkstemp` this is a
                # problem on the destination, not the source
                if ret and "mkstemp" in stderr:
                    log.warn('rsync file creation failed on "{0}"'.format(
                        node.name))
                    check_source_on_err = False
                elif "write failed on" in stderr:
                    log.warn('rsync failed to write to "{0}": {1}'.format(
                        node.name, stderr[stderr.rfind(":") + 2:].strip()))
                    check_source_on_err = False

            # If we get here then we have no idea how to transfer the file...
            else:
                log.warn("No commands available to complete this transfer.")
                check_source_on_err = False
                ret = -1

        # Okay, great we're just doing a local transfer.
        else:

            # First try to just hard link the file. This will only work if we
            # are on the same filesystem. As there's no actual copying it's
            # probably unecessary to calculate the md5 check sum, so we'll just
            # fake it.
            try:
                link_path = "%s/%s/%s" % (node.root, req.file.acq.name,
                                          req.file.name)

                # Check explicitly if link already exists as this and
                # being unable to link will both raise OSError and get
                # confused.
                if os.path.exists(link_path):
                    log.error("File %s already exists. Clean up manually." %
                              link_path)
                    check_source_on_err = False
                    ret = -1
                else:
                    os.link(from_path, link_path)
                    ret = 0
                    md5sum = (
                        req.file.md5sum
                    )  # As we're linking the md5sum can't change. Skip the check here...

            # If we couldn't just link the file, try copying it with rsync.
            except OSError:
                if command_available("rsync"):
                    ret, stdout, stderr = run_command(["rsync"] + RSYNC_OPTS +
                                                      [from_path, to_path])

                    # rsync v3+ already does a whole-file MD5 sum while
                    # transferring and guarantees the written file has the same
                    # MD5 sum as the source file, so we can skip the check here.
                    md5sum = req.file.md5sum if ret == 0 else None

                    # If the rsync error occured during `mkstemp` this is a
                    # problem on the destination, not the source
                    if ret and "mkstemp" in stderr:
                        log.warn('rsync file creation failed on "{0}"'.format(
                            node.name))
                        check_source_on_err = False
                    elif "write failed on" in stderr:
                        log.warn('rsync failed to write to "{0}": {1}'.format(
                            node.name, stderr[stderr.rfind(":") + 2:].strip()))
                        check_source_on_err = False
                else:
                    log.warn(
                        "No commands available to complete this transfer.")
                    check_source_on_err = False
                    ret = -1

        # Check the return code...
        if ret:
            if check_source_on_err:
                # If the copy didn't work, then the remote file may be corrupted.
                log.error(
                    "Copy failed: {0}. Marking source file suspect.".format(
                        stderr if stderr is not None else "Unspecified error.")
                )
                di.ArchiveFileCopy.update(has_file="M").where(
                    di.ArchiveFileCopy.file == req.file,
                    di.ArchiveFileCopy.node == req.node_from,
                ).execute()
            else:
                # An error occurred that can't be due to the source
                # being corrupt
                log.error("Copy failed.")
            continue
        et = time.time()

        # Check integrity.
        if md5sum == req.file.md5sum:
            size_mb = req.file.size_b / 2**20.0
            trans_time = et - st
            rate = size_mb / trans_time
            log.info(
                "Pull complete (md5sum correct). Transferred %.1f MB in %i "
                "seconds [%.1f MB/s]" % (size_mb, int(trans_time), rate))

            # Update the FileCopy (if exists), or insert a new FileCopy
            # Use transaction to avoid race condition
            with db.proxy.transaction():
                try:
                    done = False
                    while not done:
                        try:
                            fcopy = (di.ArchiveFileCopy.select().where(
                                di.ArchiveFileCopy.file == req.file,
                                di.ArchiveFileCopy.node == node,
                            ).get())
                            fcopy.has_file = "Y"
                            fcopy.wants_file = "Y"
                            fcopy.save()
                            done = True
                        except pw.OperationalError:
                            log.error(
                                "MySQL connexion dropped. Will attempt to reconnect in "
                                "five seconds.")
                            time.sleep(5)
                            db.connect(True)
                except pw.DoesNotExist:
                    di.ArchiveFileCopy.insert(file=req.file,
                                              node=node,
                                              has_file="Y",
                                              wants_file="Y").execute()

            # Mark any FileCopyRequest for this file as completed
            di.ArchiveFileCopyRequest.update(completed=True).where(
                di.ArchiveFileCopyRequest.file == req.file).where(
                    di.ArchiveFileCopyRequest.group_to ==
                    node.group).execute()

            if node.storage_type == "T":
                # This node is getting the transport king.
                done_transport_this_cycle = True

            # Update node available space
            update_node_free_space(node)

        else:
            log.error('Error with md5sum check: %s on node "%s", but %s on '
                      'this node, "%s".' %
                      (req.file.md5sum, req.node_from.name, md5sum, node.name))
            log.error('Removing file "%s/%s".' % (to_path, req.file.name))
            try:
                os.remove("%s/%s" % (to_path, req.file.name))
            except:
                log.error("Could not remove file.")

            # Since the md5sum failed, the remote file may be corrupted.
            log.error("Marking source file suspect.")
            di.ArchiveFileCopy.update(has_file="M").where(
                di.ArchiveFileCopy.file == req.file,
                di.ArchiveFileCopy.node == req.node_from,
            ).execute()
Esempio n. 26
0
 def test_connect_ro(self):
     db.connect()
     with self.assertRaises(pw.OperationalError):
         TestTable.update(datum=datum_value * 2).execute()
Esempio n. 27
0
def clean(node_name, days, size, force, now, target, acq):
    """Clean up NODE by marking files as potentially removable.

    If --target is specified we will only remove files already available in the
    TARGET_GROUP. This is useful for cleaning out intermediate locations such as
    transport disks.

    Using the --days flag will only clean correlator and housekeeping
    files which have a timestamp associated with them. It will not
    touch other types. If no --days flag is given, all files will be
    considered for removal.

    The size specified with --size is always rounded up depending on the size
    of the files marked for removal.  Files in this mode are ordered by
    registration time (i.e. database order), not by acquisition time, and so
    it will mark all files, not just those with a timestamp.  If more than
    <size> GiB of files are already marked for removal, no new files will be
    marked.

    The --size and --days flags are mutually exclusive.
    """

    db.connect(read_write=True)

    # Check for clashing arguments
    if days is not None and size is not None:
        raise ValueError("Parameter error: you cannot specify both --days and --size")

    # Ignore weird values
    if size is not None and size <= 0:
        print("Nothing selected for cleaning.")
        return

    try:
        this_node = di.StorageNode.get(di.StorageNode.name == node_name)
    except pw.DoesNotExist:
        print("Specified node does not exist.")
        return

    # Check to see if we are on an archive node
    if this_node.storage_type == "A":
        if force or click.confirm("DANGER: run clean on archive node?"):
            print("%s is an archive node. Forcing clean." % node_name)
        else:
            print("Cannot clean archive node %s without forcing." % node_name)
            return

    # Select FileCopys on this node.

    files = (
        di.ArchiveFileCopy.select(
            di.ArchiveFileCopy.id, di.ArchiveFileCopy.wants_file, di.ArchiveFile.size_b
        )
        .join(di.ArchiveFile)
        .where(di.ArchiveFileCopy.node == this_node)
        .order_by(di.ArchiveFile.id)
    )

    # If size is specified, we select files that are currently on the node,
    # and ignore wants_file.  Otherwise, we select all files destined for
    # this node (wants_file == 'Y'), whether or not they're already on it
    if size is None:
        files = files.where(di.ArchiveFileCopy.wants_file == "Y")
    else:
        files = files.where(di.ArchiveFileCopy.has_file == "Y")

    # Limit to acquisition
    if acq is not None:
        try:
            acq = di.ArchiveAcq.get(name=acq)
        except pw.DoesNotExit:
            raise db.NotFoundError("Specified acquisition %s does not exist" % acq)

        files_in_acq = di.ArchiveFile.select().where(di.ArchiveFile.acq == acq)

        files = files.where(di.ArchiveFileCopy.file << files_in_acq)

    # If the target option has been specified, only clean files also available there...
    if target is not None:

        # Fetch a reference to the target group
        try:
            target_group = di.StorageGroup.get(name=target)
        except pw.DoesNotExist:
            raise db.NotFoundError(
                'Target group "%s" does not exist in the DB.' % target
            )

        # First get the nodes at the destination...
        nodes_at_target = di.StorageNode.select().where(
            di.StorageNode.group == target_group
        )

        # Then use this to get a list of all files at the destination...
        files_at_target = (
            di.ArchiveFile.select()
            .join(di.ArchiveFileCopy)
            .where(
                di.ArchiveFileCopy.node << nodes_at_target,
                di.ArchiveFileCopy.has_file == "Y",
            )
        )

        # Only match files that are also available at the target
        files = files.where(di.ArchiveFileCopy.file << files_at_target)

    # If --days has been set we need to restrict to files older than the given
    # time. This only works for a few particular file types
    if days is not None and days > 0:
        # Get the time for the oldest files to keep
        oldest_unix = time.time() - 24 * 3600 * days

        # List of filetypes we want to update, needs a human readable name and a
        # FileInfo table.
        filetypes = [["correlation", di.CorrFileInfo], ["housekeeping", di.HKFileInfo]]

        file_ids = []

        # Iterate over file types for cleaning
        for name, infotable in filetypes:

            # Filter to fetch only ones with a start time older than `oldest`
            oldfiles = files.join(infotable).where(infotable.start_time < oldest_unix)

            local_file_ids = list(oldfiles)

            # Get number of correlation files
            count = oldfiles.count()

            if count > 0:
                size_bytes = (
                    di.ArchiveFileCopy.select(pw.fn.Sum(di.ArchiveFile.size_b))
                    .join(di.ArchiveFile)
                    .where(di.ArchiveFileCopy.id << local_file_ids)
                    .scalar()
                )

                size_gb = int(size_bytes) / 2 ** 30.0

                print(
                    "Cleaning up %i %s files (%.1f GB) from %s "
                    % (count, name, size_gb, node_name)
                )

                file_ids += local_file_ids

    # If size is set, iterate through files until we've satisfied the size given
    elif size is not None:

        # Convert to bytes
        size *= 2 ** 30

        # Iterate though the file list until we've found enough files
        marked_size = 0
        count = 0
        file_ids = list()
        for copy in files:
            # Add the file to the list to be marked only if necessary.
            # We can escallate wants_file = 'M' to 'N' here
            if copy.wants_file == "Y" or (now and copy.wants_file == "M"):
                file_ids.append(copy)
                marked_size += copy.file.size_b
                count += 1

            # Check if we're done.  The size subtracton happens even if
            # the file wasn't added to the list
            size -= copy.file.size_b
            if size <= 0:
                break

        if count > 0:
            print(
                "Cleaning up %i files (%.1f GB) from %s "
                % (count, marked_size / 2 ** 30, node_name)
            )
        else:
            print(
                "Size parameter already satisfied.  No new files marked for cleaning."
            )
            return

    # If neither days nor size is not set, then just select all files that
    # meet the requirements so far
    else:

        file_ids = list(files)
        count = files.count()

        if count > 0:
            size_bytes = (
                di.ArchiveFileCopy.select(pw.fn.Sum(di.ArchiveFile.size_b))
                .join(di.ArchiveFile)
                .where(di.ArchiveFileCopy.id << file_ids)
                .scalar()
            )

            size_gb = int(size_bytes) / 1073741824.0

            print(
                "Cleaning up %i files (%.1f GB) from %s " % (count, size_gb, node_name)
            )

    # If there are any files to clean, ask for confirmation and the mark them in
    # the database for removal
    if len(file_ids) > 0:
        if force or click.confirm("  Are you sure?"):
            print("  Marking files for cleaning.")

            state = "N" if now else "M"

            update = di.ArchiveFileCopy.update(wants_file=state).where(
                di.ArchiveFileCopy.id << file_ids
            )

            n = update.execute()

            print("Marked %i files for cleaning" % n)

        else:
            print("  Cancelled")
    else:
        print("No files selected for cleaning on %s." % node_name)
Esempio n. 28
0
import peewee as pw
import click

import chimedb.core as db
import chimedb.data_index.orm as di

from . import logger  # Import logger here to avoid connection

# messages for transfer

# Get a reference to the log
log = logger.get_log()

# Connect to the database read/write
db.connect(read_write=True)


@click.group()
def cli():
    """Call back commands for updating the database from a shell script after an
    HPSS transfer."""
    pass


@cli.command()
@click.argument("file_id", type=int)
@click.argument("node_id", type=int)
def push_failed(file_id, node_id):
    """Update the database to reflect that the HPSS transfer failed.
Esempio n. 29
0
def sync(
    node_name, group_name, acq, force, nice, target, transport, show_acq, show_files
):
    """Copy all files from NODE to GROUP that are not already present.

    We can also use the --target option to only transfer files that are not
    available on both the destination group, and the TARGET_GROUP. This is
    useful for transferring data to a staging location before going to a final
    archive (e.g. HPSS, transport disks).
    """

    # Make sure we connect RW
    db.connect(read_write=True)

    try:
        from_node = di.StorageNode.get(name=node_name)
    except pw.DoesNotExist:
        raise db.NotFoundError('Node "%s" does not exist in the DB.' % node_name)
    try:
        to_group = di.StorageGroup.get(name=group_name)
    except pw.DoesNotExist:
        raise db.NotFoundError('Group "%s" does not exist in the DB.' % group_name)

    # Construct list of file copies that are available on the source node, and
    # not available on any nodes at the destination. This query is quite complex
    # so I've broken it up...

    # First get the nodes at the destination...
    nodes_at_dest = di.StorageNode.select().where(di.StorageNode.group == to_group)

    # Then use this to get a list of all files at the destination...
    files_at_dest = (
        di.ArchiveFile.select()
        .join(di.ArchiveFileCopy)
        .where(
            di.ArchiveFileCopy.node << nodes_at_dest, di.ArchiveFileCopy.has_file == "Y"
        )
    )

    # Then combine to get all file(copies) that are available at the source but
    # not at the destination...
    copy = di.ArchiveFileCopy.select().where(
        di.ArchiveFileCopy.node == from_node,
        di.ArchiveFileCopy.has_file == "Y",
        ~(di.ArchiveFileCopy.file << files_at_dest),
    )

    # If the target option has been specified, only copy nodes also not
    # available there...
    if target is not None:

        # Fetch a reference to the target group
        try:
            target_group = di.StorageGroup.get(name=target)
        except pw.DoesNotExist:
            raise db.NotFoundError(
                'Target group "%s" does not exist in the DB.' % target
            )

        # First get the nodes at the destination...
        nodes_at_target = di.StorageNode.select().where(
            di.StorageNode.group == target_group
        )

        # Then use this to get a list of all files at the destination...
        files_at_target = (
            di.ArchiveFile.select()
            .join(di.ArchiveFileCopy)
            .where(
                di.ArchiveFileCopy.node << nodes_at_target,
                di.ArchiveFileCopy.has_file == "Y",
            )
        )

        # Only match files that are also not available at the target
        copy = copy.where(~(di.ArchiveFileCopy.file << files_at_target))

    # In transport mode (DEPRECATED) we only move files that don't have an
    # archive copy elsewhere...
    if transport:
        import warnings

        warnings.warn("Transport mode is deprecated. Try to use --target instead.")

        # Get list of other archive nodes
        other_archive_nodes = di.StorageNode.select().where(
            di.StorageNode.storage_type == "A", di.StorageNode.id != from_node
        )

        files_in_archive = (
            di.ArchiveFile.select()
            .join(di.ArchiveFileCopy)
            .where(
                di.ArchiveFileCopy.node << other_archive_nodes,
                di.ArchiveFileCopy.has_file == "Y",
            )
        )

        copy = copy.where(~(di.ArchiveFileCopy.file << files_in_archive))

    # Join onto ArchiveFile for later query parts
    copy = copy.join(di.ArchiveFile)

    # If requested, limit query to a specific acquisition...
    if acq is not None:

        # Fetch acq if specified
        try:
            acq = di.ArchiveAcq.get(name=acq)
        except pw.DoesNotExist:
            raise db.NotFoundError('Acquisition "%s" does not exist in the DB.' % acq)

        # Restrict files to be in the acquisition
        copy = copy.where(di.ArchiveFile.acq == acq)

    if not copy.count():
        print("No files to copy from node %s." % (node_name))
        return

    # Show acquisitions based summary of files to be copied
    if show_acq:
        acqs = [c.file.acq.name for c in copy]

        import collections

        for acq, count in collections.Counter(acqs).items():
            print("%s [%i files]" % (acq, count))

    # Show all files to be copied
    if show_files:
        for c in copy:
            print("%s/%s" % (c.file.acq.name, c.file.name))

    size_bytes = copy.select(pw.fn.Sum(di.ArchiveFile.size_b)).scalar()
    size_gb = int(size_bytes) / 1073741824.0

    print(
        "Will request that %d files (%.1f GB) be copied from node %s to group %s."
        % (copy.count(), size_gb, node_name, group_name)
    )

    if not (force or click.confirm("Do you want to proceed?")):
        print("Aborted.")
        return

    dtnow = datetime.datetime.now()

    # Perform update in a transaction to avoid any clobbering from concurrent updates
    with di.ArchiveFileCopyRequest._meta.database.atomic():

        # Get a list of all the file ids for the copies we should perform
        files_ids = [c.file_id for c in copy]

        # Get a list of all the file ids for exisiting requests
        requests = di.ArchiveFileCopyRequest.select().where(
            di.ArchiveFileCopyRequest.group_to == to_group,
            di.ArchiveFileCopyRequest.node_from == from_node,
        )
        req_file_ids = [req.file_id for req in requests]

        # Separate the files into ones that already have requests and ones that don't
        files_in = [x for x in files_ids if x in req_file_ids]
        files_out = [x for x in files_ids if x not in req_file_ids]

        sys.stdout.write(
            "Updating %i existing requests and inserting %i new ones.\n"
            % (len(files_in), len(files_out))
        )

        # Perform an update of all the existing copy requests
        if len(files_in) > 0:
            update = di.ArchiveFileCopyRequest.update(
                nice=nice,
                completed=False,
                cancelled=False,
                timestamp=dtnow,
                n_requests=di.ArchiveFileCopyRequest.n_requests + 1,
            )

            update = update.where(
                di.ArchiveFileCopyRequest.file << files_in,
                di.ArchiveFileCopyRequest.group_to == to_group,
                di.ArchiveFileCopyRequest.node_from == from_node,
            )
            update.execute()

        # Insert any new requests
        if len(files_out) > 0:

            # Construct a list of all the rows to insert
            insert = [
                {
                    "file": fid,
                    "node_from": from_node,
                    "nice": 0,
                    "group_to": to_group,
                    "completed": False,
                    "n_requests": 1,
                    "timestamp": dtnow,
                }
                for fid in files_out
            ]

            # Do a bulk insert of these new rows
            di.ArchiveFileCopyRequest.insert_many(insert).execute()