Example #1
0
def write(fondz_dir):
    index_file = join(fondz_dir, "index.html")
    topics = read_json(join(fondz_dir, "js", "topics.json"))

    formats = read_json(join(fondz_dir, "js", "formats.json"))
    format_summary = summarize(formats)

    bag_summary = read_json(join(fondz_dir, "js", "bags.json"))

    render_to('index.html', index_file, 
              topics=topics,
              format_summary=format_summary,
              bag_summary=bag_summary)
Example #2
0
def _add_topics(fondz_dir):
    logger = logging.getLogger("fondz")
    logger.info("doing topic modeling for %s", fondz_dir)
    fondz_file = _fondz_file(fondz_dir)
    fondz = read_json(fondz_file)
    fondz['topic_model'] = topics(fondz_dir)
    write_json(fondz, fondz_file)
Example #3
0
def _add_topics(fondz_dir):
    logger = logging.getLogger("fondz")
    logger.info("doing topic modeling for %s", fondz_dir)
    fondz_file = _fondz_file(fondz_dir)
    fondz = read_json(fondz_file)
    fondz['topic_model'] = topics(fondz_dir)
    write_json(fondz, fondz_file)
Example #4
0
def write(fondz_dir):
    _setup_logging(fondz_dir)

    logger = logging.getLogger("fondz")
    logger.info("writing fondz description for %s", fondz_dir)

    fondz_file = _fondz_file(fondz_dir)
    fondz = read_json(fondz_file)

    _write_static(fondz_dir)
    _write_index_html(fondz_dir, fondz)
    _write_topics_html(fondz_dir, fondz)
    _write_bags_html(fondz_dir, fondz)
    _write_formats_html(fondz_dir, fondz)
Example #5
0
def write(fondz_dir):
    _setup_logging(fondz_dir)

    logger = logging.getLogger("fondz")
    logger.info("writing fondz description for %s", fondz_dir)

    fondz_file = _fondz_file(fondz_dir)
    fondz = read_json(fondz_file)

    _write_static(fondz_dir)
    _write_index_html(fondz_dir, fondz) 
    _write_topics_html(fondz_dir, fondz)
    _write_bags_html(fondz_dir, fondz)
    _write_formats_html(fondz_dir, fondz)
Example #6
0
    def test_topics(self):
        fondz_dir = tempfile.mkdtemp()
        create("test", fondz_dir, bag, overwrite=True)
        fondz_file = join(fondz_dir, 'fondz.json')
        fondz = read_json(fondz_file)
        results = fondz['topic_model']

        # make sure mallet details are present
        self.assertTrue('mallet' in results)
        self.assertEqual(len(results['mallet']), 2)

        # the actual topics
        topics = results['topics']
        self.assertTrue(len(topics) > 0)
        self.assertEqual(len(topics[0]['words']), 15)
        for topic in topics:
            self.assertTrue(len(topic['files']) > 0)
            self.assertTrue(len(topic['words']) > 0)
            self.assertTrue(topic['score'])
        shutil.rmtree(fondz_dir)
Example #7
0
    def test_topics(self):
        fondz_dir = tempfile.mkdtemp()
        create("test", fondz_dir, bag, overwrite=True)
        fondz_file = join(fondz_dir, 'fondz.json')
        fondz = read_json(fondz_file)
        results = fondz['topic_model']

        # make sure mallet details are present
        self.assertTrue('mallet' in results)
        self.assertEqual(len(results['mallet']), 2)

        # the actual topics 
        topics = results['topics']
        self.assertTrue(len(topics) > 0) 
        self.assertEqual(len(topics[0]['words']), 15)
        for topic in topics:
            self.assertTrue(len(topic['files']) > 0)
            self.assertTrue(len(topic['words']) > 0)
            self.assertTrue(topic['score'])
        shutil.rmtree(fondz_dir)
Example #8
0
    def test_add_bags(self):
        # create a fondz directory, and add 2 test bags to it
        fondz_dir = tempfile.mkdtemp()
        init("test", fondz_dir)
        add_bag(fondz_dir, bag1)
        add_bag(fondz_dir, bag2)

        # generate and load fondz.js
        fondz_json = join(fondz_dir, "fondz.json")
        self.assertTrue(isfile(fondz_json))
        result = read_json(fondz_json)

        # check that bags are there
        self.assertEqual(len(result['bags']), 2)
        self.assertEqual(result['num_files'], 12)
        self.assertEqual(result['bytes'], 7772330)
        self.assertEqual(result['bags'][0]['path'], bag1)
        self.assertEqual(len(result['bags'][0]['manifest']), 4)
        self.assertEqual(result['bags'][1]['path'], bag2)
        self.assertEqual(len(result['bags'][1]['manifest']), 8)

        # look closer at bag1
        self.assertTrue(uuid.UUID(result['bags'][0]['id']))
        f = result['bags'][0]['manifest'][0]
        self.assertEqual(f['path'], 'data/newspaper.jpg')
        self.assertEqual(f['md5'], 'a0471d984e6e82f15da686cebdb38a36')
        self.assertEqual(f['bytes'], 7004510)
        self.assertTrue(f['modified'])
        self.assertTrue(f['created'])
        self.assertEqual(f['format'], 'fmt/43')

        # make sure formats are populated
        self.assertEqual(len(result['formats'].keys()), 5)

        # make at least one derivative is there
        bag_id = result['bags'][0]['id']
        f = result['bags'][0]['manifest'][1]

        deriv = join(fondz_dir, 'derivatives', bag_id, 'word.doc.html')
        self.assertTrue(isfile(deriv))
Example #9
0
    def test_create(self):
        d = tempfile.mkdtemp()
        create("test", d, bag1, overwrite=True)
        self.assertTrue(isdir(d))

        # fondz.json there?
        fondz_file = join(d, "fondz.json")
        self.assertTrue(isfile(fondz_file))
        fondz = read_json(fondz_file)
        bag_id = fondz["bags"][0]["id"]

        # topics there
        self.assertTrue(len(fondz["topic_model"]["topics"]) > 0)

        # bag is there
        self.assertEqual(len(fondz['bags']), 1)
        self.assertEqual(fondz['bags'][0]['path'], bag1)
        self.assertEqual(len(fondz['bags'][0]['manifest']), 4)

        f = fondz['bags'][0]['manifest'][0]
        self.assertEqual(f['path'], 'data/newspaper.jpg')
        self.assertEqual(f['bytes'], 7004510)
        self.assertEqual(f['format'], 'fmt/43')

        f = fondz['bags'][0]['manifest'][1]
        self.assertEqual(f['path'], 'data/wordperfect.wp')
        self.assertEqual(f['bytes'], 672936)
        self.assertEqual(f['format'], 'x-fmt/394')

        self.assertTrue(isfile(join(d, "derivatives", bag_id, "word.doc.html")))
        self.assertTrue(isfile(join(d, "derivatives", bag_id, "wordperfect.wp.html")))
        self.assertTrue(isfile(join(d, "derivatives", bag_id, "subdir", "word.docx.html")))

        # html files there?
        for f in ["index.html", "topics.html", "formats.html", "bags.html"]:
            html_file = join(d, f)
            self.assertTrue(isfile(html_file))
            html = open(html_file).read()
            self.assertTrue('<!doctype html>' in html)
Example #10
0
def add_bag(fondz_dir, bag_dir):
    """
    add a particular bagit directory to the fondz description
    """
    # TODO: validate bag first?
    logger = logging.getLogger("fondz")
    logger.info("adding bag %s to %s", bag_dir, fondz_dir)

    bag_dir = abspath(bag_dir)
    fondz_file = _fondz_file(fondz_dir)
    fondz = read_json(fondz_file)
    for bag in fondz["bags"]:
        if bag["path"] == bag_dir:
            raise Exception("bag was already added: %s" % bag_dir)
    bag = _get_bag(bag_dir)
    _add_formats(bag, fondz, fondz_dir)
    _convert(bag, fondz_dir)

    fondz["bags"].append(bag)
    fondz["bytes"] += bag["bytes"]
    fondz["num_files"] += len(bag["manifest"])

    write_json(fondz, fondz_file)
Example #11
0
def add_bag(fondz_dir, bag_dir):
    """
    add a particular bagit directory to the fondz description
    """
    # TODO: validate bag first?
    logger = logging.getLogger("fondz")
    logger.info("adding bag %s to %s", bag_dir, fondz_dir)

    bag_dir = abspath(bag_dir)
    fondz_file = _fondz_file(fondz_dir)
    fondz = read_json(fondz_file)
    for bag in fondz["bags"]:
        if bag["path"] == bag_dir:
            raise Exception("bag was already added: %s" % bag_dir)
    bag = _get_bag(bag_dir)
    _add_formats(bag, fondz, fondz_dir)
    _convert(bag, fondz_dir)

    fondz["bags"].append(bag)
    fondz["bytes"] += bag["bytes"]
    fondz["num_files"] += len(bag["manifest"])

    write_json(fondz, fondz_file)