Ejemplo n.º 1
0
def devpiserver_pyramid_configure(config, pyramid_config):
    # make the theme path absolute if it exists and make it available via the
    # pyramid registry
    theme_path = config.args.theme
    if theme_path:
        theme_path = os.path.abspath(theme_path)
        if not os.path.exists(theme_path):
            threadlog.error("The theme path '%s' does not exist." % theme_path)
            sys.exit(1)
        if not os.path.isdir(theme_path):
            threadlog.error("The theme path '%s' is not a directory." %
                            theme_path)
            sys.exit(1)
    pyramid_config.registry['theme_path'] = theme_path
    # by using include, the package name doesn't need to be set explicitly
    # for registrations of static views etc
    pyramid_config.include('devpi_web.main')
    pyramid_config.registry['devpiweb-pluginmanager'] = get_pluginmanager(
        config)
    pyramid_config.registry['search_index'] = get_indexer(config)

    # monkeypatch mimetypes.guess_type on because pyramid-1.5.1/webob
    # choke on mimtypes.guess_type on windows with python2.7
    if sys.platform == "win32" and sys.version_info[:2] == (2, 7):
        import mimetypes
        old = mimetypes.guess_type

        def guess_type_str(url, strict=True):
            res = old(url, strict)
            return str(res[0]), res[1]

        mimetypes.guess_type = guess_type_str
        threadlog.debug("monkeypatched mimetypes.guess_type to return bytes")
Ejemplo n.º 2
0
 def _update_projects(self, writer, projects, clear=False):
     add_document = partial(self._add_document, writer)
     counter = itertools.count()
     count = next(counter)
     main_keys = self.project_ix.schema.names()
     text_keys = (('author', 0.5), ('author_email', 0.5),
                  ('description', 1.5), ('summary', 1.75), ('keywords',
                                                            1.75))
     for project in projects:
         data = dict((u(x), get_mutable_deepcopy(project[x]))
                     for x in main_keys if x in project)
         data['path'] = u"/{user}/{index}/{name}".format(**data)
         if not clear:
             # because we use hierarchical documents, we have to delete
             # everything we got for this path and index it again
             writer.delete_by_term('path', data['path'])
         data['type'] = "project"
         data['text'] = "%s %s" % (data['name'], project_name(data['name']))
         with writer.group():
             add_document(**data)
             count = next(counter)
             for key, boost in text_keys:
                 if key not in project:
                     continue
                 add_document(
                     **{
                         "path": data['path'],
                         "type": key,
                         "text": project[key],
                         "_text_boost": boost
                     })
                 count = next(counter)
             if '+doczip' not in project:
                 continue
             if not project['+doczip'].exists():
                 log.debug("documentation not unpacked for %s",
                           data['path'])
                 continue
             for page in project['+doczip'].values():
                 if page is None:
                     continue
                 add_document(
                     **{
                         "path": data['path'],
                         "type": "title",
                         "text": page['title'],
                         "text_path": page['path'],
                         "text_title": page['title']
                     })
                 count = next(counter)
                 add_document(
                     **{
                         "path": data['path'],
                         "type": "page",
                         "text": page['text'],
                         "text_path": page['path'],
                         "text_title": page['title']
                     })
                 count = next(counter)
     return count
Ejemplo n.º 3
0
def devpiserver_on_upload(stage, project, version, link):
    if not link.entry.file_exists():
        # on replication or import we might be at a lower than
        # current revision and the file might have been deleted already
        threadlog.debug("ignoring lost upload: %s", link)
    elif link.rel == "doczip":
        index_project(stage, project)
Ejemplo n.º 4
0
 def handler(self, is_from_mirror, serial, indexname, names):
     log.debug(
         "Got %s projects from %s at serial %s for indexing",
         len(names), indexname, serial)
     ix = get_indexer(self.xom)
     counter = itertools.count()
     project_ix = ix.get_project_ix()
     main_keys = project_ix.schema.names()
     writer = project_ix.writer()
     searcher = project_ix.searcher()
     try:
         with self.xom.keyfs.transaction(write=False) as tx:
             stage = self.xom.model.getstage(indexname)
             if stage is not None:
                 for name in names:
                     data = preprocess_project(
                         ProjectIndexingInfo(stage=stage, name=name))
                     # because we use the current transaction, we also
                     # use the current serial for indexing
                     ix._update_project(
                         data, tx.at_serial, counter, main_keys, writer,
                         searcher=searcher)
         count = next(counter)
     except Exception:
         writer.cancel()
         # let the queue handle retries
         raise
     else:
         log.debug("Committing %s new documents to search index." % count)
         writer.commit()
Ejemplo n.º 5
0
 def query_from_xmlrpc(self, body):
     unmarshaller = Unmarshaller()
     parser = DefusedExpatParser(unmarshaller)
     parser.feed(body)
     parser.close()
     (data, method) = (unmarshaller.close(), unmarshaller.getmethodname())
     if method != "search":
         raise ValueError("Unknown method '%s'." % method)
     if len(data) == 2:
         query, operator = data
     else:
         query = data
         operator = "and"
     log.debug("xmlrpc_search {0}".format((query, operator)))
     operator = operator.upper()
     if operator not in ('AND', 'OR', 'ANDNOT', 'ANDMAYBE', 'NOT'):
         raise ValueError("Unknown operator '%s'." % operator)
     if set(query.keys()).difference(['name', 'summary']):
         raise ValueError("Only 'name' and 'summary' allowed in query.")
     parts = []
     for key, field in (('name', 'project'), ('summary', 'summary')):
         value = query.get(key, [])
         if len(value) == 0:
             continue
         elif len(value) == 1:
             parts.append('(type:%s "%s")' %
                          (field, value[0].replace('"', '')))
         else:
             raise ValueError("Only on value allowed for query.")
     return (" %s " % operator).join(parts)
Ejemplo n.º 6
0
 def write_changelog_entry(self, serial, entry):
     threadlog.debug("writing changelog for serial %s", serial)
     data = dumps(entry)
     c = self._sqlconn.cursor()
     c.execute("INSERT INTO changelog (serial, data) VALUES (%s, %s)",
               (serial, pg8000.Binary(data)))
     c.close()
     self._sqlconn.commit()
Ejemplo n.º 7
0
def remove_docs(stage, project, version):
    directory = str(
        stage.keyfs.basedir.join(stage.user.name, stage.index, project,
                                 version, "+doc"))
    if not os.path.isdir(directory):
        threadlog.debug("ignoring lost unpacked docs: %s" % directory)
    else:
        threadlog.debug("removing unpacked docs: %s" % directory)
        shutil.rmtree(directory)
Ejemplo n.º 8
0
def remove_docs(stage, project, version):
    if stage is None:
        # the stage was removed
        return
    directory = get_unpack_path(stage, project, version).strpath
    if not os.path.isdir(directory):
        threadlog.debug("ignoring lost unpacked docs: %s" % directory)
    else:
        threadlog.debug("removing unpacked docs: %s" % directory)
        shutil.rmtree(directory)
Ejemplo n.º 9
0
def unpack_docs(stage, name, version, entry):
    # unpack, maybe a bit uncarefully but in principle
    # we are not loosing the original zip file anyway
    unpack_path = get_unpack_path(stage, name, version)
    with entry.file_open_read() as f:
        with Archive(f) as archive:
            archive.extract(unpack_path)
    threadlog.debug("%s: unpacked %s-%s docs to %s",
                    stage.name, name, version, unpack_path)
    return unpack_path
Ejemplo n.º 10
0
 def xmlrpc_search(self):
     try:
         query = self.query_from_xmlrpc(self.request.body)
         log.debug("xmlrpc_search {0}".format(query))
         hits = self.search_index_packages(query)
         response = dumps((hits, ), methodresponse=1, encoding='utf-8')
     except Exception as e:
         log.exception("Error in xmlrpc_search")
         response = dumps(Fault(1, repr(e)), encoding='utf-8')
     return Response(response)
Ejemplo n.º 11
0
 def delete_projects(self, projects):
     counter = itertools.count()
     count = next(counter)
     writer = self.project_ix.writer()
     searcher = self.project_ix.searcher()
     for project in projects:
         path = u"/%s/%s" % (project.indexname, project.name)
         count = next(counter)
         writer.delete_by_term('path', path, searcher=searcher)
     log.debug("Committing %s deletions to search index." % count)
     writer.commit()
     log.info("Finished committing %s deletions to search index." % count)
Ejemplo n.º 12
0
 def delete_projects(self, projects):
     counter = itertools.count()
     count = next(counter)
     writer = self.project_ix.writer()
     main_keys = self.project_ix.schema.names()
     for project in projects:
         data = dict((u(x), project[x]) for x in main_keys if x in project)
         data['path'] = u"/{user}/{index}/{name}".format(**data)
         count = next(counter)
         writer.delete_by_term('path', data['path'])
     log.debug("Committing %s deletions to search index." % count)
     writer.commit()
     log.info("Finished committing %s deletions to search index." % count)
Ejemplo n.º 13
0
 def query_from_xmlrpc(self, body):
     unmarshaller = Unmarshaller()
     parser = DefusedExpatParser(unmarshaller)
     parser.feed(body)
     parser.close()
     (data, method) = (unmarshaller.close(), unmarshaller.getmethodname())
     if method != "search":
         raise ValueError("Unknown method '%s'." % method)
     if len(data) == 2:
         fields, operator = data
     else:
         fields = data
         operator = "and"
     log.debug("xmlrpc_search {0}".format((fields, operator)))
     return dict(fields=fields, operator=operator)
Ejemplo n.º 14
0
def devpiserver_on_changed_versiondata(stage, project, version, metadata):
    if stage is None:
        # TODO we don't have enough info to delete the project
        return
    if not metadata:
        if is_project_cached(stage, project) and not stage.has_project_perstage(project):
            delete_project(stage, project)
            return
        versions = stage.list_versions(project)
        if versions:
            version = get_latest_version(versions)
            if version:
                threadlog.debug("A version of %s was deleted, using latest version %s for indexing" % (
                    project, version))
                metadata = stage.get_versiondata(project, version)
    if metadata:
        index_project(stage, metadata['name'])
Ejemplo n.º 15
0
 def queue_projects(self, projects, at_serial, searcher):
     log.debug("Queuing projects for index update")
     queued_counter = itertools.count()
     queued = next(queued_counter)
     last_time = time.time()
     mirror_projects = {}
     processed = 0
     for processed, project in enumerate(projects, start=1):
         if time.time() - last_time > 5:
             last_time = time.time()
             log.debug(
                 "Processed a total of %s projects and queued %s so far. "
                 "Currently in %s" % (processed, queued, project.indexname))
         if project.is_from_mirror:
             # we find the last serial the project was changed to avoid re-indexing
             project_serial = project.stage.get_last_project_change_serial_perstage(
                 project.name, at_serial=at_serial)
             # mirrors have no docs, so we can shortcut
             path = '/%s/%s' % (project.indexname, project.name)
             existing = None
             doc_num = searcher.document_number(path=path)
             if doc_num is not None:
                 existing = searcher.stored_fields(doc_num)
             if existing:
                 existing_serial = existing.get('serial', -1)
                 if existing_serial >= project_serial:
                     continue
             # we use at_serial here, because indexing is always done
             # with the latest metadata
             key = (project.indexname, at_serial)
             _projects = mirror_projects.setdefault(key, [])
             _projects.append(project)
             if len(_projects) >= self.QUEUE_MAX_NAMES:
                 self.extend(_projects, at_serial)
                 _projects.clear()
         else:
             # private projects need to be checked in IndexerThread.handler,
             # because preprocess_project might depend on files which were
             # not available when indexing while replicating like doczips
             self.add(project, at_serial)
         queued = next(queued_counter)
     for (indexname, serial), _projects in mirror_projects.items():
         self.extend(_projects, serial)
     log.info("Processed a total of %s projects and queued %s" %
              (processed, queued))
Ejemplo n.º 16
0
Archivo: main.py Proyecto: t-8ch/devpi
def devpiserver_pyramid_configure(config, pyramid_config):
    # by using include, the package name doesn't need to be set explicitly
    # for registrations of static views etc
    pyramid_config.include('devpi_web.main')
    pyramid_config.registry['search_index'] = get_indexer(config)

    # monkeypatch mimetypes.guess_type on because pyramid-1.5.1/webob
    # choke on mimtypes.guess_type on windows with python2.7
    if sys.platform == "win32" and sys.version_info[:2] == (2, 7):
        import mimetypes
        old = mimetypes.guess_type

        def guess_type_str(url, strict=True):
            res = old(url, strict)
            return str(res[0]), res[1]

        mimetypes.guess_type = guess_type_str
        threadlog.debug("monkeypatched mimetypes.guess_type to return bytes")
Ejemplo n.º 17
0
def render_description(stage, metadata):
    desc = metadata.get("description")
    name = metadata.get("name")
    version = metadata.get("version")
    if stage is None or desc is None or name is None or version is None:
        return
    warnings = io.StringIO()
    html = readme_renderer.rst.render(desc, stream=warnings)
    warnings = warnings.getvalue()
    if warnings:
        desc = "%s\n\nRender warnings:\n%s" % (desc, warnings)
    if html is None:
        html = readme_renderer.txt.render(desc)
    if py.builtin._istext(html):
        html = html.encode("utf8")
    desc_file = get_description_file(stage, name, version)
    desc_file.dirpath().ensure_dir()
    desc_file.write(html, mode='wb')
    threadlog.debug("wrote description file: %s", desc_file)
Ejemplo n.º 18
0
Archivo: main.py Proyecto: t-8ch/devpi
def devpiserver_on_changed_versiondata(stage, projectname, version, metadata):
    if stage is None:
        # TODO we don't have enough info to delete the project
        return
    if not metadata:
        if stage.get_projectname(projectname) is None:
            delete_project(stage, projectname)
            return
        versions = stage.list_versions(projectname)
        if versions:
            version = get_latest_version(versions)
            if version:
                threadlog.debug(
                    "A version of %s was deleted, using latest version %s for indexing"
                    % (projectname, version))
                metadata = stage.get_versiondata(projectname, version)
    if metadata:
        render_description(stage, metadata)
        index_project(stage, metadata['name'])
Ejemplo n.º 19
0
 def _querystring(self, searchinfo):
     fields = searchinfo['fields']
     operator = searchinfo['operator'].upper()
     if operator not in ('AND', 'OR', 'ANDNOT', 'ANDMAYBE', 'NOT'):
         raise ValueError("Unknown operator '%s'." % operator)
     if set(fields.keys()).difference(['name', 'summary']):
         raise ValueError("Only 'name' and 'summary' allowed in query.")
     parts = []
     for key, field in (('name', 'project'), ('summary', 'summary')):
         value = fields.get(key, [])
         if len(value) == 0:
             continue
         elif len(value) == 1:
             parts.append('(type:%s "%s")' % (field, value[0].replace('"', '')))
         else:
             raise ValueError("Only one value allowed for query.")
     querystring = (" %s " % operator).join(parts)
     log.debug("_querystring {0}".format(querystring))
     return querystring
Ejemplo n.º 20
0
def iter_projects(xom):
    timestamp = time.time()
    for user in xom.model.get_userlist():
        username = ensure_unicode(user.name)
        user_info = user.get(user)
        for index, index_info in user_info.get('indexes', {}).items():
            index = ensure_unicode(index)
            stage = xom.model.getstage(username, index)
            if stage is None:  # this is async, so the stage may be gone
                continue
            log.info("Search-Indexing %s:", stage.name)
            names = stage.list_projects_perstage()
            for count, name in enumerate(names, start=1):
                name = ensure_unicode(name)
                current_time = time.time()
                if current_time - timestamp > 3:
                    log.debug("currently search-indexed %s", count)
                    timestamp = current_time
                yield preprocess_project(stage, name)
Ejemplo n.º 21
0
def unpack_docs(stage, name, version, entry):
    # unpack, maybe a bit uncarefully but in principle
    # we are not loosing the original zip file anyway
    unpack_path = get_unpack_path(stage, name, version)
    hash_path = unpack_path.join('.hash')
    if hash_path.exists():
        with hash_path.open() as f:
            if f.read().strip() == entry.hash_spec:
                return unpack_path
    if unpack_path.exists():
        unpack_path.remove()
    with entry.file_open_read() as f:
        with Archive(f) as archive:
            archive.extract(unpack_path)
    with hash_path.open('w') as f:
        f.write(entry.hash_spec)
    threadlog.debug("%s: unpacked %s-%s docs to %s", stage.name, name, version,
                    unpack_path)
    return unpack_path
Ejemplo n.º 22
0
def devpiserver_on_upload(stage, project, version, link):
    """ called when a file is uploaded to a private stage for
    a projectname/version.  link.entry.file_exists() may be false because
    a more recent revision deleted the file (and files are not revisioned).
    NOTE that this hook is currently NOT called for the implicit "caching"
    uploads to the pypi mirror.

    If the uploaded file is a wheel and is the latest version on this index,
    store its metadata in json file at the root of index/+f/ directory.
    With the standard config with nginx, nginx will directly serve this file.
    """
    if link.entry and link.entry.file_exists(
    ) and link.entry.basename.endswith('.whl'):
        threadlog.info("Wheel detected: %s", link.entry.basename)
        new_version = parse_version(version)
        latest_version = parse_version(
            stage.get_latest_version_perstage(project))
        if latest_version > new_version:
            threadlog.debug(
                "A newer release has already been uploaded: %s - nothing to do",
                latest_version)
            return
        metadata = extract_metadata_from_wheel_file(link.entry.file_os_path())
        linkstore = stage.get_linkstore_perstage(link.project, link.version)
        project_dir = '%s/%s/+f/%s' % (linkstore.filestore.storedir,
                                       stage.name, project)

        if not os.path.exists(project_dir):
            os.mkdir(project_dir)

        json_path = '%s/%s-%s.json' % (project_dir, project, new_version)
        with open(json_path, 'w') as fd:
            fd.write(json.dumps(metadata))

        threadlog.info("Stored %s to: %s", metadata, json_path)

        # We symlink the latest version
        symlink_path = '%s.json' % project_dir
        if os.path.exists(symlink_path):
            os.unlink(symlink_path)
        os.symlink(json_path, symlink_path)
Ejemplo n.º 23
0
    def validate(self, username, password):
        """ Tries to bind the user against the LDAP server using the supplied
            username and password.

            Returns a dictionary with status and if configured groups of the
            authenticated user.
        """
        threadlog.debug("Validating user '%s' against LDAP at %s." % (username, self['url']))
        username = escape(username)
        userdn = self._userdn(username)
        if not userdn:
            return dict(status="unknown")
        if not password.strip():
            return self._rejection()
        conn = self.connection(self.server(), userdn=userdn, password=password)
        if not self._open_and_bind(conn):
            return self._rejection()
        config = self.get('group_search', None)
        if not config:
            return dict(status="ok")
        groups = self._search(conn, config, username=username, userdn=userdn.encode("utf8"))
        return dict(status="ok", groups=groups)
Ejemplo n.º 24
0
    def validate(self, username, password):
        """ Tries to bind the user against the LDAP server using the supplied
            username and password.

            Returns a dictionary with status and if configured groups of the
            authenticated user.
        """
        threadlog.debug("Validating user '%s' against LDAP at %s." % (username, self['url']))
        username = escape(username)
        userdn = self._userdn(username)
        if not userdn:
            return dict(status="unknown")
        if not password.strip():
            return self._rejection()
        conn = self.connection(self.server(), userdn=userdn, password=password)
        if not self._open_and_bind(conn):
            return self._rejection()
        config = self.get('group_search', None)
        if not config:
            return dict(status="ok")
        groups = self._search(conn, config, username=username, userdn=userdn)
        return dict(status="ok", groups=groups)
Ejemplo n.º 25
0
 def queue_projects(self, projects, serial, searcher):
     log.debug("Queuing projects for index update")
     queued_counter = itertools.count()
     queued = next(queued_counter)
     last_time = time.time()
     mirror_projects = {}
     processed = 0
     for processed, project in enumerate(projects, start=1):
         if time.time() - last_time > 5:
             last_time = time.time()
             log.debug(
                 "Processed a total of %s projects and queued %s so far. "
                 "Currently in %s" % (processed, queued, project.indexname))
         # we find the last serial the project was changed to avoid re-indexing
         serial = project.stage.get_last_project_change_serial_perstage(
             project.name, at_serial=serial)
         if project.is_from_mirror:
             # mirrors have no docs, so we can shortcut
             path = '/%s/%s' % (project.indexname, project.name)
             existing = searcher.document(path=path)
             if existing:
                 existing_serial = existing.get('serial', -1)
                 if existing_serial >= serial:
                     continue
             key = (project.indexname, serial)
             _projects = mirror_projects.setdefault(key, [])
             _projects.append(project)
             if len(_projects) >= self.QUEUE_MAX_NAMES:
                 self.extend(_projects, serial)
                 _projects.clear()
         else:
             self.add(project, serial)
         queued = next(queued_counter)
     for (indexname, serial), _projects in mirror_projects.items():
         self.extend(_projects, serial)
     log.info("Processed a total of %s projects and queued %s" %
              (processed, queued))
Ejemplo n.º 26
0
def devpiserver_on_upload(stage, projectname, version, link):
    """ called when a file is uploaded to a private stage for
    a projectname/version.  link.entry.file_exists() may be false because
    a more recent revision deleted the file (and files are not revisioned).
    NOTE that this hook is currently NOT called for the implicit "caching"
    uploads to the pypi mirror.

    If the uploaded file is a wheel and is the latest version on this index,
    store its metadata in json file at the root of index/+f/ directory.
    With the standard config with nginx, nginx will directly serve this file.
    """
    if link.entry and link.entry.file_exists() and link.entry._filepath.endswith('.whl'):
        threadlog.info("Wheel detected: %s", link.entry._filepath)
        new_version = parse_version(version)
        latest_version = parse_version(stage.get_latest_version_perstage(projectname))
        if latest_version > new_version:
            threadlog.debug("A newer release has already been uploaded: %s - nothing to do", latest_current_version)
            return
        metadata = extract_metadata_from_wheel_file(link.entry._filepath)
        linkstore = stage.get_linkstore_perstage(link.projectname, link.version)
        json_path = '%s/%s/+f/%s.json' % (linkstore.filestore.storedir, stage.name, projectname)
        with open(json_path, 'w') as fd:
            fd.write(json.dumps(metadata))
        threadlog.info("Stored %s to: %s", metadata, json_path)
Ejemplo n.º 27
0
def devpiserver_auth_user(userdict, username, password):
    if ldap is None:
        threadlog.debug("No LDAP settings given on command line.")
        return dict(status="unknown")
    return ldap.validate(username, password)
Ejemplo n.º 28
0
 def mock_simple_projects(self, projectlist):
     t = "".join('<a href="%s">%s</a>\n' % (name, name)
                 for name in projectlist)
     threadlog.debug("patching simple page with: %s" % (t))
     self.httpget.mockresponse(self.mirror_url, code=200, text=t)
Ejemplo n.º 29
0
def debug(msg):
    threadlog.debug("devpi-rss: %s" % msg)
Ejemplo n.º 30
0
def devpiserver_auth_user(userdict, username, password):
    if ldap is None:
        threadlog.debug("No LDAP settings given on command line.")
        return dict(status="unknown")
    return ldap.validate(username, password)
Ejemplo n.º 31
0
def debug(msg):
    threadlog.debug("devpi-rss: %s" % msg)
Ejemplo n.º 32
0
 def _delete_project(self, indexname, project, serial, counter, writer,
                     searcher):
     path = u"/%s/%s" % (indexname, project)
     writer.delete_by_term('path', path, searcher=searcher)
     next(counter)
     log.debug("Removed %s from search index.", path)