Exemple #1
0
    def test_singlebag(self):
        # test creation of a small single bag

        shutil.copytree(self.sipdata, os.path.join(self.revdir, "1491"))
        self.sip = sip.MIDASSIPHandler(self.midasid, self.config)

        self.assertEqual(self.sip.state, status.FORGOTTEN)

        self.sip.bagit()

        self.assertTrue(
            os.path.exists(
                os.path.join(self.store,
                             self.midasid + ".1_0_0.mbag0_4-0.zip")))
        self.assertTrue(not os.path.exists(
            os.path.join(self.store, self.midasid + ".1_0_0.mbag0_4-1.zip")))

        csumfile = os.path.join(self.store,
                                self.midasid + ".1_0_0.mbag0_4-0.zip.sha256")
        self.assertTrue(os.path.exists(csumfile))
        with open(csumfile) as fd:
            csum = fd.read().strip()

        self.assertEqual(self.sip.state, status.SUCCESSFUL)
        self.assertIn('bagfiles', self.sip.status)
        self.assertEqual(len(self.sip.status['bagfiles']), 1)
        self.assertEqual(self.sip.status['bagfiles'][0]['name'],
                         self.midasid + ".1_0_0.mbag0_4-0.zip")
        self.assertEqual(self.sip.status['bagfiles'][0]['sha256'], csum)

        # check for checksum files in review dir
        cf = os.path.join(self.revdir, "1491/_preserv",
                          self.midasid + "_0.sha256")
        self.assertTrue(os.path.exists(cf), "Does not exist: " + cf)
Exemple #2
0
    def test_ctor_asupdate(self):
        self.sip = sip.MIDASSIPHandler(self.midasid,
                                       self.config,
                                       asupdate=True)
        self.assertTrue(self.sip.bagger)
        self.assertEqual(self.sip.bagger.asupdate, True)

        self.assertTrue(isinstance(self.sip.status, dict))
        self.assertEqual(self.sip.state, status.FORGOTTEN)

        self.sip = sip.MIDASSIPHandler(self.midasid,
                                       self.config,
                                       asupdate=False)
        self.assertTrue(self.sip.bagger)
        self.assertEqual(self.sip.bagger.asupdate, False)

        self.assertTrue(isinstance(self.sip.status, dict))
        self.assertEqual(self.sip.state, status.FORGOTTEN)
Exemple #3
0
    def test_is_preserved(self):
        self.assertEqual(self.sip.state, status.FORGOTTEN)
        self.assertFalse(self.sip._is_preserved())
        self.sip.bagit()
        self.assertTrue(self.sip._is_preserved())

        # if there is no longer a cached status file, ensure that we notice
        # when there is bag in the store dir
        os.remove(os.path.join(self.statusdir, self.midasid + '.json'))
        self.sip = sip.MIDASSIPHandler(self.midasid, self.config)
        stat = self.sip.status
        self.assertEqual(stat['state'], status.SUCCESSFUL)
        self.assertIn('orgotten', stat['message'])
Exemple #4
0
    def setUp(self):
        self.tf = Tempfiles()
        self.troot = self.tf.mkdir("siphandler")
        self.revdir = os.path.join(self.troot, "review")
        os.mkdir(self.revdir)
        self.workdir = os.path.join(self.troot, "working")
        # os.mkdir(self.workdir)
        self.stagedir = os.path.join(self.troot, "staging")
        # os.mkdir(self.stagedir)
        self.mdserv = os.path.join(self.troot, "mdserv")
        os.mkdir(self.mdserv)
        self.store = os.path.join(self.troot, "store")
        os.mkdir(self.store)
        self.statusdir = os.path.join(self.troot, "status")
        os.mkdir(self.statusdir)

        shutil.copytree(self.sipdata, os.path.join(self.revdir, "1491"))

        with open(os.path.join(datadir, "bagger_conf.yml")) as fd:
            baggercfg = yaml.load(fd)

        self.config = {
            "working_dir": self.workdir,
            "store_dir": self.store,
            "staging_dir": self.stagedir,
            "review_dir": self.revdir,
            "mdbag_dir": self.mdserv,
            "status_manager": {
                "cachedir": self.statusdir
            },
            "logdir": self.workdir,
            "bagparent_dir": "_preserv",
            "bagger": baggercfg,
            "ingester": {
                "data_dir": os.path.join(self.workdir, "ingest"),
                "submit": "none"
            },
            "multibag": {
                "max_headbag_size": 2000000,
                #                "max_headbag_size": 100,
                "max_bag_size": 200000000
            }
        }

        self.sip = sip.MIDASSIPHandler(self.midasid, self.config)
Exemple #5
0
    def test_metadata_revision(self):
        # test creating small update involving only metadata
        indir = os.path.join(self.revdir, "1491")
        os.mkdir(indir)
        shutil.copy(os.path.join(self.sipdata, "_pod.json"), indir)

        self.sip = sip.MIDASSIPHandler(self.midasid, self.config)

        srczip = os.path.join(distarchdir, "1491.1_0.mbag0_4-0.zip")
        destzip = os.path.join(distarchive,
                               self.midasid + ".1_0.mbag0_4-0.zip")
        cached = os.path.join(self.pubcache, os.path.basename(destzip))
        rmmrec = os.path.join(mdarchive, self.midasid + ".json")

        try:
            shutil.copyfile(srczip, destzip)
            shutil.copyfile(os.path.join(datadir, self.midasid + ".json"),
                            rmmrec)

            try:
                self.sip.bagit()
            except AIPValidationError as ex:
                self.fail(ex.description)

            self.assertTrue(
                os.path.exists(
                    os.path.join(self.store,
                                 self.midasid + ".1_0_1.mbag0_4-1.zip")))
            self.assertTrue(not os.path.exists(
                os.path.join(self.store, self.midasid +
                             ".1_0_1.mbag0_4-2.zip")))

            csumfile = os.path.join(
                self.store, self.midasid + ".1_0_1.mbag0_4-1.zip.sha256")
            self.assertTrue(os.path.exists(csumfile))
            with open(csumfile) as fd:
                csum = fd.read().strip()

            self.assertEqual(self.sip.state, status.SUCCESSFUL)
            self.assertIn('bagfiles', self.sip.status)
            self.assertEqual(len(self.sip.status['bagfiles']), 1)
            self.assertEqual(self.sip.status['bagfiles'][0]['name'],
                             self.midasid + ".1_0_1.mbag0_4-1.zip")
            self.assertEqual(self.sip.status['bagfiles'][0]['sha256'], csum)

            # check for checksum files in review dir
            cf = os.path.join(self.revdir, "1491/_preserv",
                              self.midasid + "_1.sha256")
            self.assertTrue(os.path.exists(cf), "Does not exist: " + cf)

            # check contents of revision
            bagdir = os.path.join(self.store,
                                  self.midasid + ".1_0_1.mbag0_4-1")
            unzip(bagdir + ".zip")
            bdatadir = os.path.join(bagdir, "data")
            self.assertTrue(os.path.isdir(bdatadir))
            self.assertEqual(len(os.listdir(bdatadir)), 0)
            bdatadir = os.path.join(bagdir, "metadata")
            self.assertTrue(os.path.isdir(bdatadir))
            self.assertTrue(os.path.isfile(os.path.join(bdatadir, "pod.json")))
            self.assertTrue(
                os.path.isfile(os.path.join(bdatadir, "nerdm.json")))
            self.assertGreater(len(os.listdir(bdatadir)), 2)

        finally:
            if os.path.exists(destzip):
                os.remove(destzip)
            if os.path.exists(cached):
                os.remove(cached)
            if os.path.exists(rmmrec):
                os.remove(rmmrec)
Exemple #6
0
    def test_small_revision(self):
        # test creating small update to an existing dataset
        shutil.copytree(self.sipdata, os.path.join(self.revdir, "1491"))
        shutil.rmtree(os.path.join(self.revdir, "1491", "trial3"))
        self.sip = sip.MIDASSIPHandler(self.midasid, self.config)

        srczip = os.path.join(distarchdir, "1491.1_0.mbag0_4-0.zip")
        destzip = os.path.join(distarchive,
                               self.midasid + ".1_0.mbag0_4-0.zip")
        cached = os.path.join(self.pubcache, os.path.basename(destzip))
        rmmrec = os.path.join(mdarchive, self.midasid + ".json")

        try:
            shutil.copyfile(srczip, destzip)
            shutil.copy(os.path.join(datadir, self.midasid + ".json"),
                        mdarchive)

            try:
                self.sip.bagit()
            except AIPValidationError as ex:
                self.fail(ex.description)

            self.assertTrue(
                os.path.exists(
                    os.path.join(self.store,
                                 self.midasid + ".1_1_0.mbag0_4-1.zip")))
            self.assertTrue(not os.path.exists(
                os.path.join(self.store, self.midasid +
                             ".1_1_0.mbag0_4-2.zip")))

            csumfile = os.path.join(
                self.store, self.midasid + ".1_1_0.mbag0_4-1.zip.sha256")
            self.assertTrue(os.path.exists(csumfile))
            with open(csumfile) as fd:
                csum = fd.read().strip()

            self.assertEqual(self.sip.state, status.SUCCESSFUL)
            self.assertIn('bagfiles', self.sip.status)
            self.assertEqual(len(self.sip.status['bagfiles']), 1)
            self.assertEqual(self.sip.status['bagfiles'][0]['name'],
                             self.midasid + ".1_1_0.mbag0_4-1.zip")
            self.assertEqual(self.sip.status['bagfiles'][0]['sha256'], csum)

            # check for checksum files in review dir
            cf = os.path.join(self.revdir, "1491/_preserv",
                              self.midasid + "_1.sha256")
            self.assertTrue(os.path.exists(cf), "Does not exist: " + cf)

            # check contents of revision
            bagdir = os.path.join(self.store,
                                  self.midasid + ".1_1_0.mbag0_4-1")
            unzip(bagdir + ".zip")
            bdatadir = os.path.join(bagdir, "data")
            self.assertTrue(os.path.isdir(bdatadir))
            self.assertGreater(len(os.listdir(bdatadir)), 1)
            bdatadir = os.path.join(bagdir, "metadata")
            self.assertTrue(os.path.isdir(bdatadir))
            self.assertTrue(os.path.isfile(os.path.join(bdatadir, "pod.json")))
            self.assertTrue(
                os.path.isfile(os.path.join(bdatadir, "nerdm.json")))
            self.assertGreater(len(os.listdir(bdatadir)), 2)

            with open(os.path.join(bagdir, "multibag",
                                   "member-bags.tsv")) as fd:
                members = [l.strip().split('\t')[0] for l in fd.readlines()]
            self.assertEqual(members, [
                self.midasid + ".1_0.mbag0_4-0",
                self.midasid + ".1_1_0.mbag0_4-1"
            ])

        finally:
            if os.path.exists(destzip):
                os.remove(destzip)
            if os.path.exists(cached):
                os.remove(cached)
            if os.path.exists(rmmrec):
                os.remove(rmmrec)
def main(argv):
    global log

    # process the command-line options
    prog = os.path.splitext(os.path.basename(argv[0]))[0]
    parser = define_options(prog)
    opts = parser.parse_args(argv[1:])

    if opts.isdir and opts.isediid:
        raise CLIError("-D and -E options are incompatible", 1)

    cfg = load_config(opts.cfgfile, opts.multisipcfg)

    if opts.workdir:
        cfg['working_dir'] = opts.workdir
    if 'working_dir' not in cfg:
        cfg['working_dir'] = "_" + prog + str(os.getpid())
    cfg['working_dir'] = os.path.abspath(cfg['working_dir'])

    if opts.revdir:
        cfg['review_dir'] = os.path.abspath(opts.revdir)
    if opts.storedir:
        cfg['store_dir'] = os.path.abspath(opts.storedir)
    if opts.logfile:
        cfg['logfile'] = os.path.abspath(opts.logfile)

    if 'store_dir' not in cfg:
        cfg['store_dir'] = os.path.join(cfg['working_dir'], 'store')
        if os.path.exists(cfg['working_dir']) and \
           not os.path.exists(cfg['store_dir']):
            os.mkdir(cfg['store_dir'])

    termfmt = False
    if not opts.quiet:
        termfmt = "{0}: %(levelname)s: %(message)s".format(prog)
    config.configure_log(config=cfg, addstderr=termfmt)
    log = logging.getLogger()
    log.info("Using working dir: %s", cfg['working_dir'])

    sipdirname = None
    if opts.isdir or not opts.isediid:
        if opts.isdir:
            # ID is actually the directory containing the SIP
            if not os.path.isdir(opts.sipid[0]):
                raise CLIError("%{0}: not an existing directory".format(
                    opts.sipid[0]))

            sipdir = os.path.abspath(opts.sipid[0])
            cfg['review_dir'] = os.path.dirname(sipdir)
            sipdirname = os.path.basename(sipdir)

        else:
            # ID is a MIDAS record number
            try:
                sipdir = os.path.join(cfg['review_dir'], opts.sipid[0])
                if not os.path.isdir(sipdir):
                    raise CLIError(
                        "{0}: not an existing directory".format(sipdir), 2)
                sipdirname = opts.sipid[0]
            except KeyError as ex:
                raise CLIError("Review directory not specified", 1)

        # in either of these cases, we need to look up the EDI ID from the
        # POD record
        try:
            podfile = os.path.join(sipdir, "_pod.json")
            pod = utils.read_pod(podfile)
            opts.sipid[0] = pod['identifier']
        except KeyError as ex:
            raise CLIError("POD record is missing 'identifier' property", 2)
        except Exception as ex:
            raise CLIError(str(ex), 2)

    minter = get_minter(cfg)

    # start preservation
    try:
        hndlr = sip.MIDASSIPHandler(opts.sipid[0],
                                    cfg,
                                    minter=minter,
                                    sipdirname=sipdirname)
        hndlr.bagit()
    except ConfigurationException as ex:
        raise CLIError(str(ex), 1)
    except ConfigurationException as ex:
        raise CLIError(str(ex), 1)
    except PDRException as ex:
        raise CLIError(str(ex), 3)