def test_get_document(self):

        _add_documents([
            ('000000000', 'this is file 000000000'),
            ('000000002', 'this is file 000000002'),
        ])

        fp = docarchive.get_document('000000000')
        self.assertEqual(fp.read(), 'this is file 000000000')

        fp = docarchive.get_document('000000002')
        self.assertEqual(fp.read(), 'this is file 000000002')
def main(argv):

    if len(argv) < 3:
        print __doc__
        sys.exit(-1)

    option = argv[1]
    path_or_id = argv[2]

    fp = None
    try:
        if os.path.exists(path_or_id):
            fp = file(path_or_id, 'rb')
        else:
            path_or_id = ('000000000' + path_or_id)[-9:]
            fp = docarchive.get_document(path_or_id)

        if option == '-s':
            stripTags(fp, sys.stdout)
        elif option == '-r':
            render(fp, sys.stdout)
        else:
            print __doc__
            sys.exit(-1)

    finally:
        if fp: fp.close()
 def highlight(self, analyzer, highlighter):
     maxNumFragmentsRequired = 2
     try:
         fp = docarchive.get_document(self.docid)
     except Exception, e:
         # maybe the index is outdate to refer to some non-exist file
         log.exception('Unable to get "%s"' % self.docid)
    def test_add_document(self):
        ah = docarchive.ArchiveHandler('w')

        # add files to archive
        ah.add_document('000000000', StringIO.StringIO('this is doc 000000000'))
        zfile, arc_path = ah.zfile, ah.arc_path

        ah.add_document('000000001', StringIO.StringIO('this is doc 000000001'))
        # assert 000000.zip remain open
        self.assert_(zfile == ah.zfile)
        self.assert_(arc_path == ah.arc_path)

        ah.add_document('000001001', StringIO.StringIO('this is doc 000001001'))
        # assert 000000.zip is switched
        self.assert_(zfile != ah.zfile)
        self.assert_(arc_path != ah.arc_path)

        ah.close()

        # check two zip files are created
        self.assert_(os.path.exists(os.path.join(self.apath, '000000.zip')))
        self.assert_(os.path.exists(os.path.join(self.apath, '000001.zip')))

        # check content
        fp = docarchive.get_document('000000001')
        self.assertEqual(fp.read(), 'this is doc 000000001')
def main(argv):

    if len(argv) < 3:
        print __doc__
        sys.exit(-1)

    from minds import proxy
    proxy.init(proxy.CONFIG_FILENAME)       # config to read the actual archive

    option = argv[1]
    path_or_id = argv[2]

    fp = None
    try:
        if os.path.exists(path_or_id):
            fp = file(path_or_id, 'rb')
        else:
            fp = docarchive.get_document(path_or_id)

        if option == '-s':
            stripTags(fp, sys.stdout)
        elif option == '-r':
            render(fp, sys.stdout)
        else:
            print __doc__
            sys.exit(-1)

    finally:
        if fp: fp.close()
def main(rfile, wfile, env):

    form = cgi.FieldStorage(fp=rfile, environ=env)

    docid = form.getvalue('docid','')
    if len(docid) != 9:
        pass                                            # todo: 404

    wfile.write(
"""Content-type: text/html; charset=UTF-8\r
Cache-control: no-cache\r
\r
""")

    fp = docarchive.get_document(docid)
    distillparse.render(fp, wfile)                  # todo: except 404
 def _check_archive_doc(self, docid, *signatures):
     fp = docarchive.get_document(docid)  # test docid exists (i.e. no exception)
     data = fp.read(1024)
     for s in signatures:
         self.assert_(0 <= data.find(s), s)  # have signatures