Exemple #1
0
 def test_failed_transaction(self):
     with config.tempdir() as d:
         rpkg = os.path.join(d, 'test.refpkg')
         shutil.copytree(config.data_path(
             'lactobacillus2-0.2.refpkg'), rpkg)
         r = refpkg.Refpkg(rpkg, create=False)
         r = refpkg.Refpkg(rpkg, create=False)
         v = copy.deepcopy(r.contents)
         self.assertRaises(
             Exception, r.update_file, 'tiddlywinks', '/path/to/nonexistant/thing')
         self.assertEqual(v, r.contents)
Exemple #2
0
def action(args):
    """Roll back commands on a refpkg.

    *args* should be an argparse object with fields refpkg (giving the
    path to the refpkg to operate on) and n (giving the number of
    operations to roll back).
    """
    log.info('loading reference package')

    r = refpkg.Refpkg(args.refpkg, create=False)

    # First check if we can do n rollbacks
    q = r.contents
    for i in xrange(args.n):
        if q['rollback'] is None:
            print >> sys.stderr, 'Cannot rollback %d changes; refpkg only records %d changes.' % (
                args.n, i)
            return 1
        else:
            q = q['rollback']

    for i in xrange(args.n):
        r.rollback()

    return 0
Exemple #3
0
def action(args):
    """Roll forward previously rolled back commands on a refpkg.

    *args* should be an argparse object with fields refpkg (giving the
    path to the refpkg to operate on) and optionall n (giving the
    number of operations to roll forward.
    """
    log.info('loading reference package')

    r = refpkg.Refpkg(args.refpkg, create=False)

    # First check if we can do n rollforwards
    q = r.contents
    for i in xrange(args.n):
        if q['rollforward'] is None:
            log.error(
                'Cannot rollforward {} changes; '
                'refpkg only records {} rolled back changes.'.format(args.n, i))
            return 1
        else:
            q = q['rollforward'][1]

    for i in xrange(args.n):
        r.rollforward()
    return 0
Exemple #4
0
def action(args):

    if args.refpkg:
        log.info('loading reference package')
        pkg = refpkg.Refpkg(args.refpkg, create=False)
        taxonomy = pkg.file_abspath('taxonomy')
        seq_info = pkg.file_abspath('seq_info')
    else:
        taxonomy = args.taxonomy
        seq_info = args.seq_info

        if taxonomy is None or seq_info is None:
            sys.exit('Error: --taxonomy and --seq-info are '
                     'required if refpkg is not provided.')

    with open(taxonomy, 'rU') as f:
        taxdict = {r['tax_id']: r for r in csv.DictReader(f)}

    unclassified = '<unclassified at this rank>'
    counts = Counter()
    with open(seq_info, 'rU') as f:
        for row in csv.DictReader(f):
            # tax_id at the specified rank
            tax_id = taxdict[row['tax_id']][args.rank] if row['tax_id'] else ''
            tax_name = taxdict[tax_id]['tax_name'] if tax_id else unclassified
            counts[(tax_name, tax_id)] += 1

    writer = csv.writer(args.outfile)
    writer.writerow(['tax_name', 'tax_id', 'count'])
    writer.writerows(sorted((n, i, c) for (n, i), c in counts.items()))
Exemple #5
0
    def test_update_file(self):
        scratch = tempfile.mkdtemp()
        try:
            pkg_path = os.path.join(scratch, 'test.refpkg')
            r = refpkg.Refpkg(pkg_path, create=True)
            test_file = config.data_path('bv_refdata.csv')
            test_name = 'bv_refdata.csv'
            with open(test_file, 'rb') as f:
                md5_value = refpkg.md5file(f)
            self.assertEqual(None, r.update_file('a', test_file))
            # Make sure it's properly written
            with open(os.path.join(r.path, r._manifest_name)) as h:
                self.assertEqual(json.load(h), r.contents)

            self.assertIn('a', r.contents['files'])
            self.assertEqual(r.resource_name('a'), test_name)
            self.assertEqual(r.resource_md5('a'), md5_value)

            self.assertEqual(None, r.update_file('b', test_file))
            self.assertNotEqual(r.resource_name('b'), test_name)
            self.assertTrue(r.resource_name('b').startswith('bv_refdata'))
            self.assertTrue(r.resource_name('b').endswith('.csv'))
            self.assertEqual(r.resource_md5('b'), md5_value)

            test_file2 = config.data_path('taxids1.txt')

            old_path = r.resource_path('a')
            self.assertEqual(old_path, r.update_file('a', test_file2))
            self.assertTrue(os.path.exists(os.path.join(r.path, test_name)))
        finally:
            shutil.rmtree(scratch)
    def test_rollforward(self):
        with config.tempdir() as scratch:
            rpkg = os.path.join(scratch, 'tostrip.refpkg')
            shutil.copytree(config.data_path('lactobacillus2-0.2.refpkg'),
                            rpkg)
            r = refpkg.Refpkg(rpkg, create=False)
            original_contents = copy.deepcopy(r.contents)
            r.update_metadata('boris', 'hilda')
            r.update_metadata('meep', 'natasha')
            updated_contents = copy.deepcopy(r.contents)
            r.rollback()
            r.rollback()

            class _Args(object):
                refpkg = rpkg

                def __init__(self, n):
                    self.n = n

            self.assertEqual(rollforward.action(_Args(3)), 1)
            r._sync_from_disk()
            self.assertEqual(r.contents['metadata'],
                             original_contents['metadata'])

            self.assertEqual(rollforward.action(_Args(2)), 0)
            r._sync_from_disk()
            self.assertEqual(r.contents['metadata'],
                             updated_contents['metadata'])
            self.assertEqual(r.contents['rollforward'], None)
            self.assertNotEqual(r.contents['rollback'], None)
Exemple #7
0
    def test_transaction(self):
        with config.tempdir() as d:
            rpkg = os.path.join(d, 'test.refpkg')
            shutil.copytree(config.data_path('lactobacillus2-0.2.refpkg'),
                            rpkg)
            r = refpkg.Refpkg(rpkg, create=False)
            self.assertEqual(
                r.update_metadata('author', 'Boris and Hilda'),
                "Noah Hoffman <*****@*****.**>, Sujatha Srinivasan <*****@*****.**>, Erick Matsen <*****@*****.**>"
            )
            self.assertEqual(r.current_transaction, None)
            self.assertEqual(r.log(),
                             ['Updated metadata: author=Boris and Hilda'])
            self.assertTrue(isinstance(r.contents['rollback'], dict))
            self.assertFalse('log' in r.contents['rollback'])

            original_log = copy.deepcopy(r.log())
            r.start_transaction()
            r.update_metadata('boris', 'meep')
            r.update_metadata('hilda', 'vrrp')
            r._log("Meep!")
            r.commit_transaction()
            self.assertFalse('boris' in r.contents['rollback']['metadata'])
            self.assertFalse('hilda' in r.contents['rollback']['metadata'])
            self.assertEqual(r.log(), ["Meep!"] + original_log)
Exemple #8
0
 def test_pretend_reroot(self):
     with config.tempdir() as d:
         rpkg = os.path.join(d, 'reroot.refpkg')
         shutil.copytree(config.data_path(
             'lactobacillus2-0.2.refpkg'), rpkg)
         r = refpkg.Refpkg(rpkg, create=False)
         r.reroot(pretend=True)
         self.assertEqual('2f11faa616fc7f04d7694436b5cca05f',
                          r.file_md5('tree'))
Exemple #9
0
def action(args):
    """Strips non-current files and rollback information from a refpkg.

    *args* should be an argparse object with fields refpkg (giving the
    path to the refpkg to operate on).
    """
    log.info('loading reference package')

    refpkg.Refpkg(args.refpkg, create=False).strip()
Exemple #10
0
 def test_is_ill_formed(self):
     with config.tempdir() as d:
         rpkg = os.path.join(d, 'test.refpkg')
         shutil.copytree(config.data_path(
             'lactobacillus2-0.2.refpkg'), rpkg)
         r = refpkg.Refpkg(rpkg, create=False)
         self.assertFalse(r.is_ill_formed())
         r.update_file('aln_fasta', config.data_path('little.fasta'))
         self.assertTrue(isinstance(r.is_ill_formed(), str))
Exemple #11
0
def action(args):
    """Updates a Refpkg with new files.

    *args* should be an argparse object with fields refpkg (giving the
    path to the refpkg to operate on) and changes (a series of strings
    of the form 'key=file' giving the key to update in the refpkg and
    the file to store under that key)."
    """
    log.info('loading reference package')

    pairs = [p.split('=', 1) for p in args.changes]
    if args.metadata:
        rp = refpkg.Refpkg(args.refpkg, create=False)
        rp.start_transaction()
        for key, value in pairs:
            rp.update_metadata(key, value)
        rp.commit_transaction('Updated metadata: ' +
                              ', '.join(['%s=%s' % (a, b)
                                         for a, b in pairs]))
    else:
        for key, filename in pairs:
            if not(os.path.exists(filename)):
                print("No such file: %s" % filename)
                exit(1)

        rp = refpkg.Refpkg(args.refpkg, create=False)
        rp.start_transaction()
        for key, filename in pairs:
            if key == 'tree_stats':
                with warnings.catch_warnings():
                    warnings.simplefilter(
                        "ignore", refpkg.DerivedFileNotUpdatedWarning)
                    rp.update_file(key, os.path.abspath(filename))
                # Trigger model update
                log.info('Updating phylo_model to match tree_stats')
                rp.update_phylo_model(args.stats_type, filename,
                                      args.frequency_type)
            else:
                rp.update_file(key, os.path.abspath(filename))

        rp.commit_transaction('Updates files: ' +
                              ', '.join(['%s=%s' % (a, b)
                                         for a, b in pairs]))
    return 0
Exemple #12
0
 def test_create(self):
     # Attaching to an empty directory should create a new, empty Refpkg.
     scratch = tempfile.mkdtemp()
     try:
         pkg_path = os.path.join(scratch, 'test.refpkg')
         r = refpkg.Refpkg(pkg_path, create=True)
         self.assertEqual(r.contents, refpkg.manifest_template())
         self.assertEqual(os.listdir(pkg_path), ['CONTENTS.json'])
     finally:
         shutil.rmtree(scratch)
Exemple #13
0
 def test_reroot(self):
     with config.tempdir() as d:
         rpkg = os.path.join(d, 'reroot.refpkg')
         shutil.copytree(config.data_path(
             'lactobacillus2-0.2.refpkg'), rpkg)
         r = refpkg.Refpkg(rpkg, create=False)
         r.reroot()
         self.assertEqual('9bdbf22f8bf140074d126f3d27989100',
                          r.file_md5('tree'))
         self.assertEqual(r.log(), ['Rerooting refpkg'])
Exemple #14
0
    def _test_create_phylo_model(self, stats_path, stats_type=None,
                                 frequency_type=None):
        with config.tempdir() as scratch:
            args = self._Args(scratch)
            args.tree_stats = stats_path
            args.stats_type = stats_type
            args.frequency_type = frequency_type
            create.action(args)

            r = refpkg.Refpkg(args.package_name, create=False)
            self.assertIn('phylo_model', r.contents['files'])
 def test_metadata_action(self):
     with config.tempdir() as scratch:
         pkg_path = os.path.join(scratch, 'test.refpkg')
         r = refpkg.Refpkg(pkg_path, create=True)
         self.args.changes = ['meep=boris', 'hilda=vrrp']
         self.args.metadata = True
         self.args.refpkg = pkg_path
         update.action(self.args)
         r._sync_from_disk()
         self.assertEqual(r.metadata('meep'), 'boris')
         self.assertEqual(r.metadata('hilda'), 'vrrp')
Exemple #16
0
def action(args):
    if args.clobber and os.path.isdir(args.package_name):
        try:
            shutil.rmtree(args.package_name)
        except:
            print >> sys.stderr, "Failed: Could not delete %s" % args.package_name
            return 1
    elif args.clobber and os.path.exists(args.package_name):
        try:
            os.unlink(args.package_name)
        except:
            print >> sys.stderr, "Failed: Could not delete %s" % args.package_name
            return 1
    elif not args.clobber and os.path.exists(args.package_name):
        print >> sys.stderr, 'Failed: {0} exists.'.format(args.package_name)
        return 1

    r = refpkg.Refpkg(args.package_name, create=True)
    r.start_transaction()
    r.update_metadata('locus', args.locus)  # Locus is required
    if args.description:
        r.update_metadata('description', args.description)
    if args.author:
        r.update_metadata('author', args.author)
    if args.package_version:
        r.update_metadata('package_version', args.package_version)
    if args.tree_stats:
        r.update_phylo_model(args.stats_type,
                             args.tree_stats,
                             frequency_type=args.frequency_type)

    for file_name in [
            'aln_fasta', 'aln_sto', 'mask', 'profile', 'seq_info', 'taxonomy',
            'tree', 'tree_stats', 'readme'
    ]:
        path = getattr(args, file_name)
        if path:
            r.update_file(file_name, path)
    r._log('Loaded initial files into empty refpkg')
    r.commit_transaction()
    r.strip()

    reroot_prereqs = args.reroot and args.taxonomy and args.seq_info and args.tree
    if utils.has_rppr(args.rppr) and reroot_prereqs:
        r.start_transaction()
        logging.info('%s found. Rerooting.', args.rppr)
        r.reroot(rppr=args.rppr)
        r._log('Rerooted')
        r.commit_transaction()
    elif reroot_prereqs:
        log.warn('"%s" not found. Skipping rerooting', args.rppr)

    return 0
Exemple #17
0
 def test_update_metadata(self):
     scratch = tempfile.mkdtemp()
     try:
         pkg_path = os.path.join(scratch, 'test.refpkg')
         r = refpkg.Refpkg(pkg_path, create=True)
         self.assertEqual(r.update_metadata('a', 'boris'), None)
         self.assertEqual(r.update_metadata('a', 'meep'), 'boris')
         with open(os.path.join(r.path, r._manifest_name)) as h:
             self.assertEqual(json.load(h), r.contents)
         self.assertEqual(r.metadata('a'), 'meep')
         self.assertEqual(r.metadata('b'), None)
     finally:
         shutil.rmtree(scratch)
 def test_create(self):
     with config.tempdir() as scratch:
         args = self._Args(scratch)
         create.action(args)
         r = refpkg.Refpkg(args.package_name, create=False)
         self.assertEqual(r.metadata('locus'), 'Nowhere')
         self.assertEqual(r.metadata('description'), 'A description')
         self.assertEqual(r.metadata('author'), 'Boris the Mad Baboon')
         self.assertEqual(r.metadata('package_version'), '0.3')
         self.assertEqual(r.metadata('format_version'), '1.1')
         self.assertEqual(r.contents['rollback'], None)
         args2 = self._Args(scratch)
         args2.package_name = os.path.join(scratch, 'test.refpkg')
         args2.clobber = True
         self.assertEqual(0, create.action(args2))
Exemple #19
0
    def test_strip(self):
        with config.tempdir() as scratch:
            rpkg = os.path.join(scratch, 'tostrip.refpkg')
            shutil.copytree(data_path('lactobacillus2-0.2.refpkg'), rpkg)
            r = refpkg.Refpkg(rpkg, create=False)
            r.update_metadata('boris', 'hilda')
            r.update_metadata('meep', 'natasha')

            class _Args(object):
                refpkg = rpkg

            strip.action(_Args())

            r._sync_from_disk()
            self.assertEqual(r.contents['rollback'], None)
            self.assertEqual(r.contents['rollforward'], None)
    def test_update_stats_action(self):
        with config.tempdir() as scratch:
            pkg_path = os.path.join(scratch, 'test.refpkg')
            r = refpkg.Refpkg(pkg_path, create=True)

            args = self.args
            stats_path = os.path.join(config.datadir, 'phyml_aa_stats.txt')

            args.refpkg = pkg_path
            args.changes = ['tree_stats=' + stats_path]
            args.frequency_type = 'empirical'

            update.action(args)

            r._sync_from_disk()

            self.assertIn('tree_stats', r.contents['files'])
            self.assertIn('phylo_model', r.contents['files'])
            self.assertTrue(
                r.contents['files']['phylo_model'].endswith('.json'))
    def test_action(self):
        with config.tempdir() as scratch:
            pkg_path = os.path.join(scratch, 'test.refpkg')
            r = refpkg.Refpkg(pkg_path, create=True)
            test_file = config.data_path('bv_refdata.csv')

            self.args.refpkg = pkg_path
            self.args.changes = ['meep=' + test_file, 'hilda=' + test_file]

            update.action(self.args)
            r._sync_from_disk()
            self.assertEqual(r.contents['files']['meep'], 'bv_refdata.csv')

            # Second file should have been assigned a non-clashing name
            h = r.contents['files']['hilda']
            self.assertNotEqual(h, 'bv_refdata.csv')
            self.assertTrue(h.startswith('bv_refdata'))
            self.assertTrue(h.endswith('.csv'))

            self.assertTrue(os.path.exists(r.resource_path('hilda')))
Exemple #22
0
def action(args):
    """
    Show information about reference packages.
    """
    log.info('loading reference package')

    pkg = refpkg.Refpkg(args.refpkg, create=False)

    with open(pkg.file_abspath('seq_info'), 'rU') as seq_info:
        seqinfo = list(csv.DictReader(seq_info))
        snames = [row['seqname'] for row in seqinfo]

    if args.seq_names:
        print '\n'.join(snames)
    elif args.tally:
        tally_taxa(pkg)
    elif args.lengths:
        print_lengths(pkg)
    else:
        print 'number of sequences:', len(snames)
        print 'package components\n', '\n'.join(sorted(pkg.file_keys()))
Exemple #23
0
    def test_strip(self):
        with config.tempdir() as d:
            rpkg = os.path.join(d, 'test.refpkg')
            shutil.copytree(config.data_path(
                'lactobacillus2-0.2.refpkg'), rpkg)
            r = refpkg.Refpkg(rpkg, create=False)
            self.assertFalse('boris' in r.contents['files'])
            r.update_file('boris', config.data_path('taxids1.txt'))
            boris_path = r.resource_path('boris')
            r.rollback()
            self.assertFalse('boris' in r.contents['files'])
            original_log = r.log()
            r.strip()

            self.assertFalse('boris' in r.contents['files'])
            self.assertEqual(
                r.log(), ['Stripped refpkg (removed 1 files)'] + original_log)
            self.assertFalse(os.path.exists(boris_path))
            self.assertFalse(r.is_invalid())
            self.assertEqual(len(r.contents['files']), len(
                os.listdir(r.path)) - 1)
Exemple #24
0
def action(args):
    if not (os.path.exists(args.target)):
        logging.error("Failed: no such target %s", args.target)
        return 1
    elif os.path.isdir(args.target):
        logging.info("Target is a refpkg. Working on taxonomy within it.")
        r = refpkg.Refpkg(args.target, create=False)
        path = r.file_abspath('taxonomy')
    else:
        logging.info("Target is a CSV file")
        path = args.target

    logging.info("Loading taxonomy from file.")
    with open(path, 'rU') as h:
        tree = lonely.taxtable_to_tree(h)
    result = tree.lonelynodes()
    if args.ranks:
        result = (n for n in result if n.rank in args.ranks)

    writer = csv.writer(args.output)
    writer.writerow(['tax_name', 'tax_id', 'rank'])
    writer.writerows(sorted((n.tax_name, n.key, n.rank) for n in result))
Exemple #25
0
    def test_rollback(self):
        with config.tempdir() as d:
            rpkg = os.path.join(d, 'test.refpkg')
            shutil.copytree(config.data_path(
                'lactobacillus2-0.2.refpkg'), rpkg)
            r = refpkg.Refpkg(rpkg, create=False)
            self.assertRaises(ValueError, r.rollback)
            self.maxDiff = None
            v0 = copy.deepcopy(r.contents)
            self.assertFalse('boris' in r.contents['metadata'])
            r.start_transaction()
            r.update_metadata('boris', 'meep')
            r.update_file('boris', config.data_path('taxids1.txt'))
            r.commit_transaction()
            boris_path = r.resource_path('boris')
            self.assertTrue('boris' in r.contents['files'])
            self.assertFalse('boris' in r.contents['rollback']['files'])

            self.assertFalse('boris' in r.contents['rollback']['metadata'])
            self.assertTrue('boris' in r.contents['metadata'])

            v1 = copy.deepcopy(r.contents)
            r.rollback()
            self.assertFalse('boris' in r.contents['files'])
            self.assertFalse('boris' in r.contents['md5'])
            self.assertTrue(os.path.exists(boris_path))
            v3 = copy.deepcopy(r.contents)
            v0.pop('rollforward')
            v3.pop('rollforward')
            self.assertEqual(v0, v3)
            r.rollforward()
            self.assertEqual(v1, r.contents)

            # We shouldn't be able to roll forward after running an unrelated
            # operation
            r.rollback()
            r.update_metadata('boris', 'hilda')
            self.assertRaises(ValueError, r.rollforward)
Exemple #26
0
def action(args):
    r = refpkg.Refpkg(args.refpkg, create=False)
    r.reroot(rppr=args.rppr, pretend=args.pretend)
Exemple #27
0
def action(args):
    rp = refpkg.Refpkg(args.refpkg, create=False)
    sys.stdout.write('%s\n' % rp.file_abspath(args.item))
    return 0