def _add_internal (ostream, percent_done_fn, repo, response, params, content, wait):

    # this can be called in several different ways.
    # In general, you post a multipart/form-data body which
    # contains a "contenttype" for the document, and either a "URL"
    # for the content, or a "content" parameter containing the
    # the actual content.  If both "URL" and "content" are present,
    # the URL is added as the "original-url" value for the metadata,
    # and if the content is HTML, it's used as the "original.html"
    # and the URL is used to pull ancillary content referenced in it.

    content_type = params.get("contenttype")
    url = params.get("URL")
    noredir = params.get("no-redirect")
    noredir = noredir and (noredir.lower() == "true")
    uploadloc = url
    docname = params.get("documentname")
    tempf = None
    suppress_duplicates = params.get("suppress-duplicates")
    suppress_duplicates = suppress_duplicates and (suppress_duplicates.lower() == "true")
    bury = params.get("bury")
    bury = bury and (bury.lower() == "true")
    verbosity = int(params.get("verbosity") or "0")
    if content:
        if wait and ostream:
            _rewrite_job_output(ostream, '{ state: 0, msg: "Caching page..."}')
        extension = CONTENT_TYPES.get(content_type)
        if not extension:
            if wait:
                msg = "Don't know what to do with contenttype \"%s\"" % content_type
                if ostream:
                    _rewrite_job_output(ostream, '{state: 1, msg: "' + urllib.quote(msg) + '"}')
                else:
                    response.error(HTTPCodes.UNSUPPORTED_MEDIA_TYPE, msg)
            return
        # special case HTML/XHTML
        if content and (content_type.lower() in ("text/html", "application/xhtml+xml")):
            tempf = tempfile.mkdtemp()
            uploadloc = os.path.join(tempf, "original.html")
            # make sure that the folder for other parts exists, even if empty
            os.mkdir(os.path.join(tempf, "original_files"))
            # remove our bookmarklet, if present
            content = _BOOKMARKLET_PATTERN.sub('', content)
            content = _ADD_FORM_PATTERN.sub('', content)
            c = _OurCacher(url, filename=uploadloc, bits=content, content_type=content_type)
            # make sure that the folder for other parts exists, even if empty
            other_parts = os.path.join(tempf, "original_files")
            if not os.path.exists(other_parts):
                os.mkdir(other_parts)
        # special case 3x5 cards
        elif (docname and (content_type.lower() == "text/plain") and os.path.splitext(docname)[1] == ".3x5"):
            fd, tempf = tempfile.mkstemp(".3x5")
            fp = os.fdopen(fd, "wb")
            fp.write(content)
            fp.close()
            uploadloc = tempf
        else:
            fd, tempf = tempfile.mkstemp("." + extension)
            fp = os.fdopen(fd, "wb")
            fp.write(content)
            fp.close()
            uploadloc = tempf
        if suppress_duplicates:
            hash = calculate_originals_fingerprint(tempf)
            results = repo.do_query("sha-hash:"+hash)
            if results:
                # it's a duplicate
                doc = results[0][1]
                if os.path.isdir(tempf):
                    shutil.rmtree(tempf)
                elif os.path.exists(tempf):
                    os.remove(tempf)
                if ostream:
                    _rewrite_job_output(ostream, '{ state: 2, doc_id: "' + doc.id + '"}')
                elif noredir:
                    response.reply(doc.id, "text/plain")
                else:
                    response.redirect("/action/basic/dv_show?doc_id=%s" % doc.id)
                return
    try:
        try:
            # get a cookie for authentication
            cookie = repo.new_cookie(url or content[:min(100, len(content))])
            cookie_str = '%s=%s; path=/; Secure' % (cookie.name(), cookie.value())
            os.environ["UPLIB_COOKIE"] = cookie_str
            doctitle = params.get("md-title")
            docauthors = params.get("md-authors")
            docdate = params.get("md-date")
            doccats = params.get("md-categories")
            metadata = params.get("metadata")
            if metadata:
                mdtmpfile = tempfile.mktemp()
                open(mdtmpfile, "w").write(metadata)
                # check to see if we're replacing an existing document
                md2 = read_metadata(StringIO.StringIO(metadata))
                existing_doc_id = md2.get("replacement-contents-for")
                if existing_doc_id and not repo.valid_doc_id(existing_doc_id):
                    raise ValueError("Invalid doc ID %s specified for replacement" % existing_doc_id)
            else:
                mdtmpfile = None
                existing_doc_id = None
            # now form the command
            scheme = ((repo.get_param("use-http", "false").lower() == "true") or _use_http) and "http" or "https"
            cmd = '%s --verbosity=%s --repository=%s://127.0.0.1:%s ' % (_uplib_add_document, verbosity, scheme, repo.port())
            if doctitle:
                cmd += ' --title=%s' % pipes.quote(doctitle)
            if docauthors:
                cmd += ' --authors=%s' % pipes.quote(docauthors)
            if docdate:
                cmd += ' --date="%s"' % docdate
            if doccats:
                cmd += ' --categories=%s' % pipes.quote(doccats)
            if mdtmpfile:
                cmd += ' --metadata="%s"' % mdtmpfile
            cmd += ' "%s"' % uploadloc
            if ostream:
                _rewrite_job_output(ostream, '{state: 0, msg: "' + urllib.quote(cmd) + '"}')
            # and invoke the command
            status, output, tsignal = subproc(cmd)
            note(4, "cmd is %s, status is %s, output is %s", repr(cmd), status, repr(output.strip()))
            if mdtmpfile:
                os.unlink(mdtmpfile)
            if status == 0:
                # success; output should be doc-id
                doc_id = existing_doc_id or output.strip().split()[-1]
                note(4, "output is '%s'; doc_id for new doc is %s", output.strip(), doc_id)
                if wait and ostream:
                    _rewrite_job_output(ostream, '{ state: 1, doc_id: "' + doc_id + '", msg: "' + urllib.quote(output) + '"}')
                # wait for it to come on-line
                if percent_done_fn:
                    percent_done_fn(40)         # estimate 40% of work done on client side
                while not repo.valid_doc_id(doc_id):
                    if ostream:
                        pending = repo.list_pending(full=True)
                        s = _first(pending, lambda x: x['id'] == doc_id)
                        if not s:
                            break
                        dstatus = s['status']
                        if dstatus == 'error':
                            msg = 'server-side error incorporating document'
                            _rewrite_job_output(ostream, '{ state: 3, doc_id: "' + doc_id
                                                + '", msg: "' + urllib.quote(s['error']) + '"}')
                            break
                        if dstatus == 'unpacking':
                            msg = 'starting ripper process...'
                        elif dstatus == 'ripping':
                            msg = "ripping with ripper '" + s['ripper'] + "'..."
                        elif dstatus == 'moving':
                            msg = 'adding to registered document set...'
                        _rewrite_job_output(ostream, '{ state: 1, doc_id: "' + doc_id
                                            + '", msg: "' + urllib.quote(msg) + '"}')
                    time.sleep(1.0)
                if percent_done_fn:
                    percent_done_fn(100)        # finished
                if repo.valid_doc_id(doc_id):
                    if bury:
                        # wait up to 100 seconds for it to show up in history list
                        # after that, wait another second, then bury it
                        counter = 100
                        while counter > 0:
                            h = [x.id for x in repo.history()]
                            if doc_id in h:
                                break
                            counter -= 1
                            time.sleep(1)
                        time.sleep(1)
                        repo.touch_doc(doc_id, bury=True, notify=False)
                        note(3, "buried %s", doc_id)
                    if wait:
                        if ostream:
                            _rewrite_job_output(ostream, '{ state: 2, doc_id: "' + doc_id + '"}')
                        elif noredir:
                            response.reply(doc_id, "text/plain")
                        else:
                            response.redirect("/action/basic/dv_show?doc_id=%s" % doc_id)
            else:
                note("cmd <<%s>> failed with status %s:\n%s", cmd, status, output)
                if wait:
                    if ostream:
                        _rewrite_job_output(ostream, '{ state: 3, msg: "' + urllib.quote('Error processing the document:\n' + output) + '"}')
                    else:
                        response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "<pre>" + htmlescape(output) + "</pre>")
        except:
            e = ''.join(traceback.format_exception(*sys.exc_info()))
            if wait:
                note(3, "Exception processing uplib-add-document request:\n%s", htmlescape(e))
                if ostream:
                    _rewrite_job_output(ostream, '{state: 3, msg: "' + urllib.quote("Exception processing uplib-add-document request:\n" + e) + '"}')
                else:
                    response.error(HTTPCodes.INTERNAL_SERVER_ERROR,
                                   "Exception processing uplib-add-document request:\n<pre>" +
                                   htmlescape(e) + "\n</pre>")
            else:
                note("Exception processing uplib-add-document request:\n%s", e)
    finally:
        if tempf and os.path.isfile(tempf):
            os.unlink(tempf)
        elif tempf and os.path.isdir(tempf):
            shutil.rmtree(tempf)
 def _add_vcards_file (repo, response, tfile):
     try:
         fp = response.open("text/plain")
         conf = configurator.default_configurator()
         update_configuration(conf)
         tal = ensure_assembly_line(conf.get("assembly-line"))
         cards = []
         try:
             parsed = vCards.myformat(tfile)
             parsed['upload'] = False
             parsed['usepng'] = True
             for card in parsed.get('parsed-cards'):
                 # see if there's already a card for this name
                 query = 'apparent-mime-type:"%s" AND vcard-name:"%s"' % (
                     vCard.format_mimetype, card.fn.value)
                 hits = repo.do_query(query)
                 if hits:
                     if 'metadata' not in parsed:
                         parsed['metadata'] = {}
                     parsed['metadata']['version-of'] = hits[0][1].id
                 p = vCard(card, parsed)
                 # calculate fingerprint
                 fd, filename = tempfile.mkstemp()
                 fp = os.fdopen(fd, "wb")
                 p.write_to_file(fp)
                 fp.close()
                 fingerprint = calculate_originals_fingerprint(filename)
                 # look up fingerprint in repo to see if we already have it
                 hits = repo.do_query('sha-hash:%s' % fingerprint)
                 if hits:
                     # already there, so skip this one
                     note(3, "skipping '%s', already in repo...", card.fn.value)
                     continue
                 # new card, so add it
                 pinst = p.process()
                 if isinstance(pinst, DocumentParser):
                     try:
                         folder = repo.create_document_folder(repo.pending_folder())
                         id = os.path.basename(folder)
                         note("using id %s for %s...", id, card.fn.value)
                         # add the tfolder to the repository
                         process_folder(repo, id, pinst.folder, True)
                         flesh_out_folder(id, None, None, repo, None, None)
                         note("added card for %s\n" % card.fn.value)
                         cards.append((id, card.fn.value))
                     except:
                         msg = "Exception processing vCard; vCard is\n%s\nException was\n%s\n" % (
                             card, ''.join(traceback.format_exception(*sys.exc_info())))
                         note(0, msg)
         finally:
             if tal:
                 from uplib.addDocument import AssemblyLine
                 shutil.rmtree(AssemblyLine)
             if os.path.exists(tfile):
                 os.unlink(tfile)
     except:
         msg = "Exception processing vcards:\n%s\n" % ''.join(traceback.format_exception(*sys.exc_info()))
         note(0, msg)
         response.error(HTTPCodes.INTERNAL_SERVER_ERROR, msg)
     else:
         response.reply('\n'.join(['%20s:  %s' % (x[0], x[1]) for x in cards]))
 def _add_icalendar_file (repo, response, tfile):
     try:
         conf = configurator.default_configurator()
         update_configuration(conf)
         tal = ensure_assembly_line(conf.get("assembly-line"))
         try:
             parsed = iCalendar.myformat(tfile)
             if not isinstance(parsed, dict):
                 note(0, "Can't parse supposed iCalendar file %s", tfile)
                 response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't parse file")
                 return
             resp = response.open("text/plain")
             for event, name, uid in parsed.get('parsed-events'):
                 if hasattr(event, "dtstart"):
                     identifier = "%s @ %s" % (name, event.dtstart.value)
                 else:
                     identifier = name
                 # see if there's already a event for this name
                 query = 'apparent-mime-type:"%s" AND event-uid:"%s"' % (
                     iCalendarEventParser.format_mimetype, uid)
                 hits = repo.do_query(query)
                 if hits:
                     if 'metadata' not in parsed:
                         parsed['metadata'] = {}
                     parsed['metadata']['version-of'] = hits[0][1].id
                 if event.name == "VEVENT":
                     p = iCalendarEventParser(name,
                                              {"icsname": name,
                                               "icsuid": uid,
                                               "icsevent": event,
                                               "upload": False,
                                               "usepng": True,
                                               "metadata": parsed.get("metadata") or {},
                                               })
                 else:
                     note(3, "No supported iCalendar subtype found in %s", identifier)
                     p = None
                 if p:
                     # calculate fingerprint
                     fd, filename = tempfile.mkstemp(".ics")
                     fp = os.fdopen(fd, "wb")
                     p.write_to_file(fp)
                     fp.close()
                     fingerprint = calculate_originals_fingerprint(filename)
                     # look up fingerprint in repo to see if we already have it
                     hits = repo.do_query('sha-hash:%s' % fingerprint)
                     if hits:
                         # already there, so skip this one
                         note(3, "skipping '%s', already in repo...", identifier)
                         resp.write("skipping '%s', already in repo\n" % identifier)
                         continue
                     # new event, so add it
                     p.metadata["sha-hash"] = fingerprint
                     pinst = p.process()
                     if isinstance(pinst, DocumentParser):
                         try:
                             folder = repo.create_document_folder(repo.pending_folder())
                             id = os.path.basename(folder)
                             # add the tfolder to the repository
                             process_folder(repo, id, pinst.folder, True)
                             flesh_out_folder(id, None, None, repo, None, None)
                             resp.write("added event for %s\n" % identifier)
                         except:
                             msg = "Exception processing event; event is\n%s\nException was\n%s\n" % (
                                 event, ''.join(traceback.format_exception(*sys.exc_info())))
                             note(0, msg)
                             resp.write(msg)
         finally:
             if tal:
                 from uplib.addDocument import AssemblyLine
                 shutil.rmtree(AssemblyLine)
             if os.path.exists(tfile):
                 os.unlink(tfile)
     except:
         msg = "Exception processing iCalendar:\n%s\n" % ''.join(traceback.format_exception(*sys.exc_info()))
         note(0, msg)
         response.error(HTTPCodes.INTERNAL_SERVER_ERROR, msg)