def _common(self):

        global REPO_EXTENSION_DIRS, ALLOW_OLD_EXTENSIONS
        if not REPO_EXTENSION_DIRS:
            conf = configurator.default_configurator()
            REPO_EXTENSION_DIRS = PATH_SEPARATOR.join((
                os.path.join(self.repo.overhead_folder(), "extensions", "active"),
                os.path.join(conf.get('uplib-lib'), 'site-extensions')))
            ALLOW_OLD_EXTENSIONS = conf.get_bool("allow-old-extensions")

        module_name, function_name = self.angel_action[0]
        exception = None
        callable = None
        try:
            callable = find_action_function(module_name, function_name, self.repo.get_actions_path())
        except:
            t, v, b = sys.exc_info()
            exception = ''.join(traceback.format_exception(t, v, b))
            note(0, "find_action_function(%s/%s) raised an exception:\n%s", module_name, function_name, exception)
        if callable:
            field_values = request_to_field_dict(self.request) or {}
            try:
                resp = response(self, self.current_user is not None)
                callable(self.repo, resp, field_values)
                return True
            except ForkRequestInNewThread, x:
                note(4, "forked off request")
                self._auto_finish = False
                return False
            except:
def update_configuration():
    global JAVA, INDEXING_ADD_CMD, INDEXING_REMOVE_CMD, INDEXING_JAR, LUCENE_JAR, INDEXING_PROPERTIES, INDEXING_BATCHADD_CMD, DEBUG_FLAGS

    conf = configurator.default_configurator()
    props = conf.get("indexing-properties")

    if LUCENE == "java":

        JAVA = conf.get("java")
        LUCENE_JAR = conf.get("lucene-jarfile")
        INDEXING_JAR = conf.get("uplib-indexing-jarfile")
        INDEXING_ADD_CMD = conf.get("indexing-add-command")
        INDEXING_BATCHADD_CMD = conf.get("indexing-batch-add-command")
        INDEXING_REMOVE_CMD = conf.get("indexing-remove-command")
        if plibUtil._verbosity > 1:
            DEBUG_FLAGS = " -Dcom.parc.uplib.indexing.debugMode=true"
        else:
            DEBUG_FLAGS = ""

        if props:
            INDEXING_PROPERTIES = "\"-Dcom.parc.uplib.indexing.indexProperties=%s\"" % props
        else:
            INDEXING_PROPERTIES = ""

    elif LUCENE == 'jcc':

        import uplib.indexing
        uplib.indexing.initialize()
def get_default_rippers(repo):
    """Returns a default set of Ripper instances.

    :param repo: the repository instance
    :type repo: uplib.repository.Repository
    :return: the default set of rippers, in order
    :rtype: list(uplib.ripper.Ripper)
    """

    from uplib import createThumbnails, createHTML, createIndexEntry, createPageBboxes, paragraphs

    conf = configurator.default_configurator()

    default = [SimpleSummaryRipper(repo, int(repo.get_param("summary-length") or conf.get_int("summary-length") or 250)),
               paragraphs.ParagraphRipper(repo),
               createThumbnails.ThumbnailRipper(repo),
               createPageBboxes.BboxesRipper(repo),
               createHTML.HTMLRipper(repo),
               createIndexEntry.LuceneRipper(repo),
               ]

    if (sys.platform == "darwin") and conf.get_bool("install-finder-icon-ripper", True):
        from macstuff import MacRipper
        # add the Mac ripper just before the Lucene ripper
        default.insert(-2, MacRipper(repo))

    # if we have language support, add that ripper, too
    try:
        from uplib.language import GuessLanguageRipper
    except ImportError:
        pass
    else:
        default.insert(0, GuessLanguageRipper(repo))

    return default
def after_repository_instantiation(repo):
    from uplib.plibUtil import note, configurator, uthread
    conf = configurator.default_configurator()
    rss_enabled = conf.get_bool("enable-rss-reader", True)
    if not rss_enabled:
        note("RSSReader:  explicitly disabled -- not initializing.")
        return
    start(repo)
def repo_properties (repo, response, params):
    """
    Return the properties of the repository.  These include values like
    `name`, `port`, `uplib-home`, `uplib-bin`, `uplib-lib`, `uplib-version`,
    `categories` (a comma-separated list of category names),
    `docs` (a comma-separated list of doc IDs), `collections` (a comma-separated list
    of collection IDs), `last-modified-time` (a timestamp with the last-modified
    time of the repository, as a floating point string giving seconds past the Unix epoch).

    :return: the repository properties specified above
    :rtype: either an XML-formatted data set, if "Accept: application/xml" is specified, \
            or a plain text list of properties, with one per line (lines can be very long)
    """
    d = {}
    d['name'] = repo.name()
    d['port'] = repo.port()
    d['uplib-home'] = configurator.default_configurator().get("uplib-home")
    d['uplib-bin'] = configurator.default_configurator().get("uplib-bin")
    d['uplib-lib'] = configurator.default_configurator().get("uplib-lib")
    d['uplib-version'] = configurator.default_configurator().get("UPLIB_VERSION")
    c = repo.categories()
    c.sort(lambda x, y: cmp(string.lower(x), string.lower(y)))
    d['categories'] = ','.join(c)
    d['docs'] = ','.join([doc.id for doc in repo.generate_docs()])
    d['collections'] = ','.join([x.id for x in repo.list_collections()])
    d['last-modified-time'] = str(repo.mod_time())

    if response.xml_request or (params.get("format") == "xml"):

        retval = getDOMImplementation().createDocument(None, "repository", None)
        e = retval.createElement('properties')
        for element in d:
            e.setAttribute(element, str(d[element]))
        retval.documentElement.appendChild(e)
        fp = response.open("application/xml;charset=utf-8")
        fp.write(retval.toxml("UTF-8") + "\n")
        fp.close()
        return

    else:

        fp = response.open("text/plain")
        write_metadata(fp, d)
        fp.close()
    def __init__(self, filesystem, repository):

        default_handler.default_handler.__init__(self, filesystem)
        self.__repo__ = repository
        conf = configurator.default_configurator()
        if conf.get('no-caching'):
            self.allow_cache = False
        else:
            self.allow_cache = True
        self.version = repository.get_version()
        self.version = (isinstance(self.version, unicode) and self.version.encode("ASCII", "replace")) or self.version
def after_repository_instantiation(repo):

    global CITATION_PARSER, HEADER_PARSER

    conf = configurator.default_configurator()
    CITATION_PARSER = conf.get("citeseer-citation-parser")
    HEADER_PARSER = conf.get("citeseer-header-parser")

    if CITATION_PARSER or HEADER_PARSER:
        rippers = repo.rippers()
        rippers.insert(-3, CiteSeerParserRipper(repo))
def update_configuration():
    global TIFFINFO, TIFFCP, TIFFSET, TAR, UNTAR_CMD, SUMMARY_LENGTH, CODETIMER_ON

    conf = configurator.default_configurator()

    TIFFINFO = conf.get("tiffinfo")
    TIFFCP = conf.get("tiffcp")
    TIFFSET = conf.get("tiffset")
    TAR = conf.get("tar")
    UNTAR_CMD = conf.get("untar-command")
    SUMMARY_LENGTH = conf.get_int("summary-length")
    CODETIMER_ON = conf.get_bool("codetimer-on", False)
def update_configuration():

    global TIFFSPLIT, TIFFCP, THUMBNAIL_TYPE, TIFF_SPLIT_CMD, NUMBERING_FONT, LEGEND_FONT, PREVIOUS_ICON, NEXT_ICON, MAX_SCALING_FACTOR
    global PAGEIMAGE_MAXWIDTH, PAGEIMAGE_MAXHEIGHT, TOP_ICON, CONSTANT_AREA_FACTOR, USE_VIRTUAL_INK, UNDER_CONSTRUCTION
    global AUTO_CROP_BIG_THUMBNAILS, DISTORT_VERY_SMALL_THUMBNAILS

    note(3, "in createThumbnails.update_configuration()")

    conf = configurator.default_configurator()

    TIFFSPLIT = conf.get("tiffsplit")
    TIFFCP = conf.get("tiffcp")
    TIFF_SPLIT_CMD = conf.get("tiff-split-command")
    THUMBNAIL_TYPE = conf.get("thumbnail-strategy", "log-area")
    NUMBERING_FONT = conf.get("numbering-font-file")
    LEGEND_FONT = conf.get("legend-font-file")
    previous_page_icon_file = conf.get("previous-page-icon-file")
    if not previous_page_icon_file:
      note(0, "No previous-page-icon-file parameter in site.config nor .uplibrc")
      note(0, "Aborting update_configuration!")
      raise IOError("No previous-page-icon-file parameter in site.config nor .uplibrc")
    try:
      PREVIOUS_ICON = Image.open(previous_page_icon_file)
    except IOError:
      note(0, "Could not load %s as an image." % previous_page_icon_file);
      note(0, "Aborting update_configuration!")
      raise IOError("Could not load %s." % previous_page_icon_file)
    next_page_icon_file = conf.get("next-page-icon-file")
    if not next_page_icon_file:
      note(0, "No next-page-icon-file parameter was found in site.config nor .uplibrc.");
      note(0, "Aborting update_configuration!")
      raise IOError("No next-page-icon-file parameter in config")
    try:
      NEXT_ICON = Image.open(next_page_icon_file)
    except IOError:
      note(0, "Could not load %s as an image." % next_page_icon_file);
      note(0, "Aborting update_configuration!")
      raise IOError("Cound not load %s." % next_page_icon_file)
    temp = Image.open(conf.get("top-icon-file"))
    TOP_ICON = Image.new(temp.mode, temp.size, (255, 255, 255))
    TOP_ICON.paste(temp, (0, 0), temp)
    MAX_SCALING_FACTOR = float(conf.get("page-image-max-scaling-factor") or "0.33")
    PAGEIMAGE_MAXWIDTH = float(conf.get("page-image-max-width-pixels") or "680")
    PAGEIMAGE_MAXHEIGHT = float(conf.get("page-image-max-height-pixels") or "880")
    CONSTANT_AREA_FACTOR = float(conf.get("constant-area-factor") or "4.5")
    USE_VIRTUAL_INK = conf.get_bool("use-alpha-channel-thumbnails", false)
    AUTO_CROP_BIG_THUMBNAILS = conf.get_bool("auto-crop-big-thumbnails", true)
    DISTORT_VERY_SMALL_THUMBNAILS = conf.get_bool("keep-very-small-thumbnails", false)
    images_dir = os.path.join(conf.get("uplib-share"), "images")
def excluded_categories (repo):
    """Return a dict mapping category name strings to whether or not they
    match the "excluded-categories" RE.

    :param repo: the repository
    :type repo: uplib.repository.Repository
    :return: mapping of category names to whether they are excluded
    :rtype: dict
    """
    conf = configurator.default_configurator()
    excluded_pattern = re.compile(conf.get("excluded-categories") or "email/.*")
    categories = {}
    for category in repo.categories():
        categories[category] = (excluded_pattern.match and True) or False
    return categories
    def do_action (self, request, field_values, content):

        note(4, "in do_action (%s)", request.angel_action)

        global REPO_EXTENSION_DIRS, ALLOW_OLD_EXTENSIONS
        if not REPO_EXTENSION_DIRS:
            conf = configurator.default_configurator()
            REPO_EXTENSION_DIRS = PATH_SEPARATOR.join((
                os.path.join(self.__repo__.overhead_folder(), "extensions", "active"),
                os.path.join(conf.get('uplib-lib'), 'site-extensions')))
            ALLOW_OLD_EXTENSIONS = conf.get_bool("allow-old-extensions")

        module_name, function_name = request.angel_action[0]
        exception = None
        callable = None
        try:
            callable = find_action_function(module_name, function_name, self.__repo__.get_actions_path())
        except:
            t, v, b = sys.exc_info()
            exception = ''.join(traceback.format_exception(t, v, b))
            note(0, "find_action_function(%s/%s) raised an exception:\n%s", module_name, function_name, exception)
        if callable:
            try:
                self.action_counter.increment()
                if field_values == None: field_values = {}
                logged_in = is_logged_in(self.__repo__, request)
                resp = response(request, content, self.__repo__, logged_in)
                if module_name == 'basic' and function_name == 'repo_status_json':
                    # try not to log this call
                    request.log = _ignore_request_logging
                callable(self.__repo__, resp, field_values)
                return true
            except ForkRequestInNewThread, x:
                note(4, "forked off request")
                return false
            except Exception, x:
                note(0, "signalling exception <%s> at point 1a:", x)
                excn_data = sys.exc_info()
                signal_python_exception(request, excn_data)
#                 s2 = python_exception_html (excn_data, None)
#                 request.reply_code = 500
#                 request['Content-Type'] = 'text/html'
#                 request['Content-Length'] = len(s2)
#                 request.push(s2)
                return true
def matching_ids_and_filenames(repo, response, params):
    """Search for documents matching query=terms and return zero or more matches.

    Each line lists a matching document as:  doc_id ' ' filename
    """
    # This is an abbreviated version of basicPlugins.py _repo_search().
    query = params['query']
    global INTERACTION_CHARSET
    if not INTERACTION_CHARSET:
        conf = configurator.default_configurator()
        INTERACTION_CHARSET = conf.get('interaction-charset', 'UTF-8')
    query = unicode(query, INTERACTION_CHARSET, 'replace')
    cutoff = 0.0
    coll = PrestoCollection(repo, None, query, None, None, cutoff)
    result = []
    for doc in coll.docs():
        title, mtype = doc_title_and_type(doc)
        result.append('%s %s' % (doc.id, title))

    response.reply('\n'.join(result), 'text/plain')
 def __init__(self, doc, options):
     global UPLIB_SHARE
     DocumentParser.__init__(self, doc, options)
     self.video = options.get("video")
     if not self.video:
         self.video = pyglet.media.load(doc)
         if not self.video.video_format:
             raise ValueError("Unknown video format encountered")
     self.size = self.get_video_size(self.video.video_format.width,
                                     self.video.video_format.height,
                                     self.video.video_format.sample_aspect)
     if (UPLIB_SHARE is None) or (self.NSAMPLES is None):
         c = configurator.default_configurator()
         UPLIB_SHARE = c.get("uplib-share")
         self.NSAMPLES = c.get_int("number-of-video-sample-frames", 5)
     duration = self.video.duration
     if duration:
         self.metadata['duration'] = str(duration)
     if have_hachoir:
         try:
             md = hachoir_metadata.extractMetadata(hachoir_parser.createParser(
                 unicode(doc), doc))
             d = {}
             for v in md:
                 if v.values:
                     d[v.key] = v.values[0].value
             v = d.get("last_modification")
             if v:
                 self.metadata['last-modified'] = v.isoformat('Z')
                 note("last-modified is %s", self.metadata['last-modified'])
             v = d.get("creation_date") or v
             if v:
                 self.metadata['date'] = v.strftime("%m/%d/%Y")
             mime_type = d.get("mime_type")
             if mime_type:
                 self.metadata['apparent-mime-type'] = mime_type
         except:
             pass
     # don't try to optimize away blank frames if we don't have many frames
     self.saveblanks = self.saveblanks or (self.NSAMPLES < 2)
def update_configuration():

    global CONTROLS_TEMPLATE_FILE, CONTROLS_TEMPLATE, CONTROLS_TEMPLATE_FILE_MODDATE, CONTROLS_HEIGHT
    global THUMBNAIL_COLWIDTH, USE_VIRTUAL_INK

    conf = configurator.default_configurator()
    template = conf.get("default-html-controls-template-file")
    if template: template = os.path.expanduser(template)
    if template and os.path.exists(template):
        moddate = os.path.getmtime(template)
    note(3, "default-html-controls-template-file is %s (was %s)", template, CONTROLS_TEMPLATE_FILE)
    if (template and os.path.exists(template) and
        (CONTROLS_TEMPLATE_FILE != template or CONTROLS_TEMPLATE_FILE_MODDATE < moddate)):
        note(3, "re-reading controls template file")
        fp = open(template, 'r')
        CONTROLS_TEMPLATE = fp.read()
        fp.close()
        CONTROLS_TEMPLATE_FILE = template
        CONTROLS_TEMPLATE_FILE_MODDATE = moddate
    CONTROLS_HEIGHT = conf.get_int('html-controls-panel-height') or 200
    THUMBNAIL_COLWIDTH = conf.get_int('html-thumbnails-column-width') or 130
    USE_VIRTUAL_INK = conf.get_bool("use-alpha-channel-thumbnails") or false
    plib_path = conf.get("plib-path")
def doc_categorize (repo, response, params):

    from uplib.basicPlugins import show_abstract, _is_sensible_browser
    from uplib.basicPlugins import show_title, STANDARD_BACKGROUND_COLOR, STANDARD_TOOLS_COLOR, STANDARD_LEGEND_COLOR
    from uplib.basicPlugins import __issue_javascript_head_boilerplate as issue_javascript_head_boilerplate
    from uplib.basicPlugins import __issue_menu_definition as issue_menu_definition
    from uplib.basicPlugins import __issue_title_styles as issue_title_styles

    global _CONFIGURATION
    if _CONFIGURATION is None:
        _CONFIGURATION = { "exclusions": [
            re.compile(x.strip()) for x in configurator.default_configurator().get("categorize-excluded-categories", "").split(",") if x.strip()]}

    def figure_size(count, avgsize):
        if avgsize < 0.0001:
            return 0.0001
        return math.sqrt(math.log((count * (math.e - 1))/avgsize + 1))

    doc_id = params.get("doc_id")
    if not doc_id:
        response.error(HTTPCodes.BAD_REQUEST, "No doc_id parameter specified.")
        return
    doc = repo.valid_doc_id(doc_id) and repo.get_document(doc_id)
    if not doc:
        response.error(HTTPCodes.BAD_REQUEST, "Invalid doc_id parameter '%s' specified." % doc_id)
        return
    fp = response.open()
    title = (doc.get_metadata("title") or doc.id).encode("UTF-8", "strict")
    fp.write("<head><title>Categorizing '%s'</title>\n" % htmlescape(title))
    fp.write('<meta http-equiv="Content-Script-Type" content="text/javascript">\n')
    fp.write('<link REL="SHORTCUT ICON" HREF="/favicon.ico">\n')
    fp.write('<link REL="ICON" type="image/ico" HREF="/favicon.ico">\n')
    issue_javascript_head_boilerplate(fp)
    issue_title_styles(fp)
    fp.write('</head><body bgcolor="%s">\n' % STANDARD_BACKGROUND_COLOR)
    issue_menu_definition(fp)
    show_abstract(repo, doc, fp, _is_sensible_browser(response.user_agent), showpagesearch=False)
    fp.write("<hr />\n")
    doccats = [x.lower() for x in doc.get_category_strings()]
    for cat in doccats[:]:
        if cat.find('/') >= 0:
            parts = cat.split('/')
            for i in range(1, len(parts)):
                doccats.append('/'.join(parts[:i]))
    tags = find_likely_tags(doc)
    if tags:
        # try to remove duplicates
        stags = min(10, len(tags))
#         tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0]
#         count = 0
#         i = 0
#         while tagnames and (i < stags):
#             if tags[i][0] in tagnames:
#                 del tags[i]
#                 stags = min(10, len(tags))
#                 tagnames = [tag[0].split('/')[0] for tag in tags[:stags] if tag[0].find('/') >= 0]
#             else:
#                 i += 1

        fp.write("<center><small><i>Likely categories</i></small><br />")
        count = 0
        topscore = _adjust_score(*tags[0][1][:2])
        exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions")
        for name, (score, ndocs, ascore) in tags:

            if count > stags:
                break

            skip = False
            for exclusion in exclusions:
                if exclusion.match(name.lower()):
                    skip = True
                    break
            if skip:
                continue

            if count > 0:
                fp.write(" &middot; ")
            #size = max(0.5, (2/topscore) * ascore)
            size = 1
            color = (name.lower() in doccats) and "red" or "black"
            action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % (
                (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name))
            fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category (score=%.3f)">%s</a>' % (
                size, color, action,
                (name.lower() in doccats) and "remove" or "add",
                htmlescape(name), ascore, htmlescape(name)))
            count += 1
        fp.write("</center></p><hr />\n")
    fp.write('<form action="%s" method=get><center>Add a new category to this document: ' %
             ('/'.join(response.request_path.split('/')[:3]) + '/doc_add_category'))
    fp.write('<input type=hidden name="doc_id" value="%s">\n' % doc.id)
    fp.write('<input type=text name="tag" value="" size=40></form></center>\n')
    note(4, "doc_categorize:  retrieving repository categories... (%s)", time.ctime())
    cats = repo.get_categories_with_docs()
    note(4, "doc_categorize:  have categories (%s)", time.ctime())
    if cats:
        fp.write("<hr>\n<center><small><i>All categories</i></small><br />")
        avgsize = sum([len(x) for x in cats.values()]) / float(len(cats))
        catkeys = cats.keys()
        catkeys.sort(lambda x, y: cmp(x.lower(), y.lower()))
        first = True
        exclusions = _CONFIGURATION and _CONFIGURATION.get("exclusions")
        for name in catkeys:
            skip = False
            for exclusion in exclusions:
                if exclusion.match(name.lower()):
                    skip = True
                    break
            if skip:
                continue

            if not first:
                fp.write(" &middot; ")
            else:
                first = False
            size = max(0.5, figure_size(len(cats[name]), avgsize))
            color = (name.lower() in doccats) and "red" or "black"
            action = '/'.join(response.request_path.split('/')[:3]) + '/doc_%s_category?doc_id=%s&tag=%s' % (
                (name.lower() in doccats) and "remove" or "add", doc.id, urllib.quote_plus(name))
            actionsee = '/action/basic/repo_search?query=%s' % (
                urllib.quote_plus('categories:"%s"' % name))
            fp.write('<a style="font-size: %fem; color: %s;" href="%s" title="%s the \'%s\' category">%s</a>' % (
                size, color, action,
                (name.lower() in doccats) and "remove" or "add",
                htmlescape(name), htmlescape(name)))
            fp.write('<a style="font-size: %fem; color: %s; vertical-align: super;" href="%s" ' % (
                max(0.4, size/2), STANDARD_LEGEND_COLOR, actionsee) +
                     'title="see the %s document%s in the \'%s\' category" target="_blank">%d</a>' % (
                         (len(cats[name]) == 1) and "one" or str(len(cats[name])),
                         (len(cats[name]) != 1) and "s" or "", htmlescape(name), len(cats[name])))
                     
    fp.write("</body>\n")
def upload_document (repository, response, fields):

    """Upload a complete UpLib document folder to the repository.  Used by `uplib-add-document`.

    :Parameters:
        file
          the folder, as a zip file or tar file.  Using a tar file is considered obsolete.
        filetype
          must be either 'tarred-folder' or 'zipped-folder'
        format
          the format of the response to send back.  If 'xml', an XML document will be generated,
          containing a single text node called 'id', containing the document ID of the new document,
          but the default is to send back simply the new doc ID as a plain text string.
        title
          optionally, the title of the document
        id
          optionally, a pre-assigned doc ID to use.  If there is no folder with this ID in the
          `pending` directory, this will raise an error.
        authors
          optionally, a list of authors, each name separated from the next with the string " and ".
        source
          optionally, a string describing the source of the document.
        date
          optionally, an UpLib-format date string `[[DD/]MM/]YYYY`
        keywords
          optionally, a comma-separated list of keywords to associate with the document
        categories
          optionally, a comma-separated list of categories (tags) to associate with the document
        abstract
          optionally, an abstract for the document
        citation
          optionally, a citation in some citation format for the document
        comment
          optionally, some text giving a comment on the document
    :return: the document ID for the new document
    :rtype: plain text string, or if XML is specified, an XML ``result`` element containing an ``id`` node with the ID as its text
    """

    global INTERACTION_CHARSET

    def possibly_set (db, fields, valuename, unfold_lines=false):
        if fields.has_key(valuename):
            if unfold_lines:
                value = string.replace(string.replace(fields[valuename], '\n', ' '), '\r', ' ')
            else:
                value = fields[valuename]
            value = unicode(value, INTERACTION_CHARSET, "replace")
            db[valuename] = value

    if not INTERACTION_CHARSET:
        conf = configurator.default_configurator()
        INTERACTION_CHARSET = conf.get('interaction-charset', 'UTF-8')

    if (not fields.has_key('newfile')) or (not fields.has_key('filetype')):
        response.error(HTTPCodes.BAD_REQUEST, "Badly formed upload request.\n")
        return

    known_content_types = repository.content_types()

    msgtag = ""
    try:

        doc_bits = fields['newfile']
        doc_type = fields['filetype']

        if not doc_type in known_content_types:
            response.error(HTTPCodes.UNSUPPORTED_MEDIA_TYPE, "Can't upload files of type '%s'.\n" % doc_type)
            return

        metadata = {}
        possibly_set(metadata, fields, "title")
        possibly_set(metadata, fields, "id")
        possibly_set(metadata, fields, "authors")
        possibly_set(metadata, fields, "source")
        possibly_set(metadata, fields, "date")
        possibly_set(metadata, fields, "keywords")
        possibly_set(metadata, fields, "categories")
        possibly_set(metadata, fields, "abstract", true)
        possibly_set(metadata, fields, "citation", true)
        possibly_set(metadata, fields, "comment", true)
        possibly_set(metadata, fields, "name")

        note(2, "Adding new document; len(bits) = %d, type='%s'", len(doc_bits), doc_type)
        id = repository.create_new_document(doc_bits, doc_type, metadata)

        # update the global list of categories
        categories_value = fields.has_key('categories') and fields['categories']
        cleaned_categories = (categories_value and
                              map(lambda x: string.strip(x), string.split(categories_value, ','))) or []
        db_categories = repository.categories()
        for category in cleaned_categories:
            if not category in db_categories:
                repository.add_category(category)

        if response.xml_request or (fields.get("format") == "xml"):

            retval = getDOMImplementation().createDocument(None, "result", None)
            e = retval.createTextNode('id')
            e.data = id
            retval.documentElement.appendChild(e)
            fp = response.open("application/xml;charset=utf-8")
            fp.write(retval.toxml("UTF-8") + "\n")
            fp.close()
            return

        else:

            fp = response.open("text/plain")
            fp.write(id)
            fp.close()
            return

    except:

        typ, ex, tb = sys.exc_info()
        raise ex, None, tb
 def _add_icalendar_file (repo, response, tfile):
     try:
         conf = configurator.default_configurator()
         update_configuration(conf)
         tal = ensure_assembly_line(conf.get("assembly-line"))
         try:
             parsed = iCalendar.myformat(tfile)
             if not isinstance(parsed, dict):
                 note(0, "Can't parse supposed iCalendar file %s", tfile)
                 response.error(HTTPCodes.INTERNAL_SERVER_ERROR, "Can't parse file")
                 return
             resp = response.open("text/plain")
             for event, name, uid in parsed.get('parsed-events'):
                 if hasattr(event, "dtstart"):
                     identifier = "%s @ %s" % (name, event.dtstart.value)
                 else:
                     identifier = name
                 # see if there's already a event for this name
                 query = 'apparent-mime-type:"%s" AND event-uid:"%s"' % (
                     iCalendarEventParser.format_mimetype, uid)
                 hits = repo.do_query(query)
                 if hits:
                     if 'metadata' not in parsed:
                         parsed['metadata'] = {}
                     parsed['metadata']['version-of'] = hits[0][1].id
                 if event.name == "VEVENT":
                     p = iCalendarEventParser(name,
                                              {"icsname": name,
                                               "icsuid": uid,
                                               "icsevent": event,
                                               "upload": False,
                                               "usepng": True,
                                               "metadata": parsed.get("metadata") or {},
                                               })
                 else:
                     note(3, "No supported iCalendar subtype found in %s", identifier)
                     p = None
                 if p:
                     # calculate fingerprint
                     fd, filename = tempfile.mkstemp(".ics")
                     fp = os.fdopen(fd, "wb")
                     p.write_to_file(fp)
                     fp.close()
                     fingerprint = calculate_originals_fingerprint(filename)
                     # look up fingerprint in repo to see if we already have it
                     hits = repo.do_query('sha-hash:%s' % fingerprint)
                     if hits:
                         # already there, so skip this one
                         note(3, "skipping '%s', already in repo...", identifier)
                         resp.write("skipping '%s', already in repo\n" % identifier)
                         continue
                     # new event, so add it
                     p.metadata["sha-hash"] = fingerprint
                     pinst = p.process()
                     if isinstance(pinst, DocumentParser):
                         try:
                             folder = repo.create_document_folder(repo.pending_folder())
                             id = os.path.basename(folder)
                             # add the tfolder to the repository
                             process_folder(repo, id, pinst.folder, True)
                             flesh_out_folder(id, None, None, repo, None, None)
                             resp.write("added event for %s\n" % identifier)
                         except:
                             msg = "Exception processing event; event is\n%s\nException was\n%s\n" % (
                                 event, ''.join(traceback.format_exception(*sys.exc_info())))
                             note(0, msg)
                             resp.write(msg)
         finally:
             if tal:
                 from uplib.addDocument import AssemblyLine
                 shutil.rmtree(AssemblyLine)
             if os.path.exists(tfile):
                 os.unlink(tfile)
     except:
         msg = "Exception processing iCalendar:\n%s\n" % ''.join(traceback.format_exception(*sys.exc_info()))
         note(0, msg)
         response.error(HTTPCodes.INTERNAL_SERVER_ERROR, msg)
def _scan_rss_sites(repo):

    global _ADDED_SITES, _REMOVED_SITES

    try:
        from uplib.plibUtil import configurator, note, write_metadata, id_to_time, create_new_id
        from uplib.extensions import find_and_load_extension
        conf = configurator.default_configurator()

        if repo:
            sys_inits_path = os.path.join(conf.get('uplib-lib'), 'site-extensions')
            repo_inits_path = os.path.join(repo.root(), "overhead", "extensions", "active")
            upload_m = find_and_load_extension("UploadDocument", "%s|%s" % (repo_inits_path, sys_inits_path), None, True)
            if not upload_m:
                note(0, "Can't load UploadDocument extension!")
                sys.exit(1)
            else:
                note("UploadDocument extension is %s", upload_m)

        scan_period = conf.get_int("rss-scan-period", 60 * 2)
        startup_delay = conf.get_int("rss-startup-delay", 0)
        del conf

        import feedparser

        if startup_delay > 0:
            note(3, "startup delay is %d", startup_delay)
            time.sleep(startup_delay)

    except:
        note(0, "RSSReader:  exception starting RSS scan thread:\n%s",
             ''.join(traceback.format_exception(*sys.exc_info())))
        return

    rss_sites = -1
    while True:
        try:
            conf = configurator()       # re-read uplibrc file
            old_rss_sites = rss_sites
            rss_sites = conf.get("rss-sites")
            if old_rss_sites == -1 or (old_rss_sites != rss_sites):
                note(2, "rss_sites are %s", rss_sites)
            scan_period = conf.get_int("rss-scan-period", scan_period)
            expiration_period = conf.get_int("rss-expiration-period", 30 * 24 * 60 * 60)        # 30 days
            if rss_sites:
                rss_sites = rss_sites.split() + _ADDED_SITES
            else:
                rss_sites = _ADDED_SITES[:]
            if rss_sites:
                for site in _REMOVED_SITES:
                    if site in rss_sites:
                        rss_sites.remove(site)
            if rss_sites:
                feeds = []
                for site in rss_sites:
                    if site.startswith("feed:"):
                        feeds.append(feedparser.parse(site))
                    elif site.startswith("http:") or site.startswith("https:"):
                        feeds += find_feeds(site)
                note("feeds are:\n%s", [(x.feed.title, x.href, len(x.entries)) for x in feeds])
                for feed in feeds:
                    note("RSSReader:  %s: %s entries in feed %s", time.ctime(), len(feed.entries), feed.feed.title)
                    for entry in feed.entries:
                        d = process_entry(entry)
                        if not d:
                            continue
                        id = d.get("rss-id")
                        hits = repo.do_query('+rss-id:"%s"' % id)
                        if hits:
                            # already in repo
                            continue
                        if repo:
                            response = FakeResponse(repo)
                            mdoutput = StringIO.StringIO()
                            write_metadata(mdoutput, d)
                            md = mdoutput.getvalue()
                            mdoutput.close()
                            upload_m.add(repo, response, { 'URL': d.get("original-url"),
                                                           'wait': "true",
                                                           'no-redirect': "true",
                                                           'metadata': md,
                                                           'md-categories': "RSSReader/%s" % feed.feed.title,
                                                           })
                            if response.thread:
                                while response.thread.isAlive():
                                    response.thread.join(1.0)
                            note("RSSReader:  %s:  %s (%s: %s)", time.ctime(), repr(d.get("title")), response.code, response.message)
                        else:
                            note("RSSReader:  %s:  %s (%s)\n    %s", time.ctime(), repr(d.get("title")), d.get("date"), d.get("summary"))
            # now do expiries
            old_id = create_new_id(time.time() - expiration_period)[:-5]
            hits = repo.do_query("categories:RSSReader AND id:[00000-00-0000-000 TO %s] AND NOT categories:RSSReader/_noexpire_" % old_id)
            for score, doc in hits:
                # check to see if the user has looked at it
                if os.path.exists(os.path.join(doc.folder(), "activity")):
                    doc.add_category("RSSReader/_noexpire_", True)
                # and if not, remove it
                else:
                    repo.delete_document(doc.id)
            time.sleep(scan_period)
        except KeyboardInterrupt:
            if _IGNORE_KEYBOARD_INTERRUPTS:
                note(0, "RSSReader:  %s", ''.join(traceback.format_exception(*sys.exc_info())))
            else:
                sys.exit(0)                
        except:
            note(0, "RSSReader:  %s", ''.join(traceback.format_exception(*sys.exc_info())))
 def _add_vcards_file (repo, response, tfile):
     try:
         fp = response.open("text/plain")
         conf = configurator.default_configurator()
         update_configuration(conf)
         tal = ensure_assembly_line(conf.get("assembly-line"))
         cards = []
         try:
             parsed = vCards.myformat(tfile)
             parsed['upload'] = False
             parsed['usepng'] = True
             for card in parsed.get('parsed-cards'):
                 # see if there's already a card for this name
                 query = 'apparent-mime-type:"%s" AND vcard-name:"%s"' % (
                     vCard.format_mimetype, card.fn.value)
                 hits = repo.do_query(query)
                 if hits:
                     if 'metadata' not in parsed:
                         parsed['metadata'] = {}
                     parsed['metadata']['version-of'] = hits[0][1].id
                 p = vCard(card, parsed)
                 # calculate fingerprint
                 fd, filename = tempfile.mkstemp()
                 fp = os.fdopen(fd, "wb")
                 p.write_to_file(fp)
                 fp.close()
                 fingerprint = calculate_originals_fingerprint(filename)
                 # look up fingerprint in repo to see if we already have it
                 hits = repo.do_query('sha-hash:%s' % fingerprint)
                 if hits:
                     # already there, so skip this one
                     note(3, "skipping '%s', already in repo...", card.fn.value)
                     continue
                 # new card, so add it
                 pinst = p.process()
                 if isinstance(pinst, DocumentParser):
                     try:
                         folder = repo.create_document_folder(repo.pending_folder())
                         id = os.path.basename(folder)
                         note("using id %s for %s...", id, card.fn.value)
                         # add the tfolder to the repository
                         process_folder(repo, id, pinst.folder, True)
                         flesh_out_folder(id, None, None, repo, None, None)
                         note("added card for %s\n" % card.fn.value)
                         cards.append((id, card.fn.value))
                     except:
                         msg = "Exception processing vCard; vCard is\n%s\nException was\n%s\n" % (
                             card, ''.join(traceback.format_exception(*sys.exc_info())))
                         note(0, msg)
         finally:
             if tal:
                 from uplib.addDocument import AssemblyLine
                 shutil.rmtree(AssemblyLine)
             if os.path.exists(tfile):
                 os.unlink(tfile)
     except:
         msg = "Exception processing vcards:\n%s\n" % ''.join(traceback.format_exception(*sys.exc_info()))
         note(0, msg)
         response.error(HTTPCodes.INTERNAL_SERVER_ERROR, msg)
     else:
         response.reply('\n'.join(['%20s:  %s' % (x[0], x[1]) for x in cards]))
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

#
# papers over differences between Medusa and Tornado and any other server framework
# we may use in the future, like Twisted
#

import sys, os, re

from uplib.plibUtil import configurator

conf = configurator.default_configurator()
service_framework = conf.get("service-framework")
if service_framework == "Medusa":
    from uplib.angelHandler import ForkRequestInNewThread, run_fn_in_new_thread
    from uplib.startAngel import darwin_launchd, daemon, unix_mainloop, start_angel

    def set_top_level_action(handler):
        if not isinstance(handler, tuple) or (len(handler) != 2):
            raise RuntimeError("toplevel handler must be tuple of ('MODULENAME', 'FUNCTIONNAME')")
        import uplib.angelHandler
        setattr(uplib.angelHandler, "TOP_LEVEL_ACTION", handler)

elif service_framework == "Tornado":
    from uplib.tornadoHandler import ForkRequestInNewThread, run_fn_in_new_thread
    from uplib.startTornado import darwin_launchd, daemon, unix_mainloop, start_angel