Ejemplo n.º 1
0
def populate():
    session = DBSession()
    root = Root(name=u'PyPI')
    session.add(root)

    client = xmlrpclib.ServerProxy('http://pypi.python.org/pypi')
    packages = client.list_packages()

    # Do it in parallel to go faster
    results = pool.map(ingest_package, packages)

    for i, result in enumerate(results):
        package = result['name']
        print "Populating DB with:", i, package

        # Query for it first...
        if Package.query.filter_by(name=package).count() > 0:
            print "Package '%s' is already in the DB.  Skipping." % package
            continue

        p = Package(name=package, root=root)
        session.add(p)

        for release_data in result['releases']:
            release = release_data['name']
            data = release_data['data']

            r = Release(
                name=release,
                package=p,
                summary=data.get('summary', '')
            )

            for classifier in data['classifiers']:
                query = Classifier.query.filter_by(name=classifier)
                if query.count() == 0:
                    k = Classifier(name=classifier)
                    session.add(k)

                k = Classifier.query.filter_by(name=classifier).one()
                r.classifiers.append(k)

            for keyword in (data['keywords'] or '').split():
                query = Keyword.query.filter_by(name=keyword)
                if query.count() == 0:
                    k = Keyword(name=keyword)
                    session.add(k)

                k = Keyword.query.filter_by(name=keyword).one()
                r.keywords.append(k)

            if 'maintainer' in data:
                query = Maintainer.query.filter_by(name=data['maintainer'])
                if query.count() == 0:
                    a = Maintainer(name=data['maintainer'],
                                   email=data.get('maintainer_email'))
                    session.add(a)

                a = Maintainer.query.filter_by(name=data['maintainer']).one()
                r.maintainer = a

            if 'author' in data:
                query = Author.query.filter_by(name=data['author'])
                if query.count() == 0:
                    a = Author(name=data['author'],
                               email=data.get('author_email'))
                    session.add(a)

                a = Author.query.filter_by(name=data['author']).one()
                r.author = a

            if 'license' in data:
                query = License.query.filter_by(name=data['license'])
                if query.count() == 0:
                    l = License(name=data['license'])
                    session.add(l)

                l = License.query.filter_by(name=data['license']).one()
                r.license = l

            session.add(r)

    session.commit()