def __convert_findbugs_xml(url, findbugs_xml):
                    import xmldict, json

                    result_json = json.loads(json.dumps(xmldict.parse(findbugs_xml)).replace('"@','"'))
                    url_arr = get_metadata_from_url(url)

                    # -1: jar, -2: version, -3: artifact_id, -4: group_id
                    _jar_filename = metadata['jar_filename']
                    _version = metadata['version']
                    _artifact_id = metadata['artifact_id']
                    _group_id = metadata['group_id']

                    # get pom information
                    _pom_url = url.replace('.jar', '.pom')
                    _pom_filename = _jar_filename.replace('.jar', '.pom')

                    log.info('Downloading POM: %s -> %s' % (_pom_url, _pom_filename))                    
                    urllib.urlretrieve(_pom_url, _pom_filename)
                    _dependencies = []

                    if os.path.exists(_pom_filename):
                        try:
                            _pom_json = json.loads(json.dumps(xmldict.parse(open(_pom_filename, 'r').read())))
                            _dependencies = _pom_json.get('project', {}).get('dependencies', {}).get('dependency', [])
                        except Exception, e:
                            log.warn('Could not download/parse data from %s' % (_pom_filename,))

                        os.remove(_pom_filename)
def convert_findbugs_xml(findbugs_xml):
    import xmldict
    import json

    xml_data = open(findbugs_xml, "r").read()
    result_json = json.loads(json.dumps(xmldict.parse(xml_data)).replace('"@', '"'))

    # -1: jar, -2: version, -3: artifact_id, -4: group_id
    _full_name = result_json.get("BugCollection", {}).get("Project", {}).get("Jar", None)

    if _full_name is None:
        raise Exception("Jar tag is not found (%s)" % (result_json,))

    print "Processing ... %s" % (_full_name,)

    _jar_filename = os.path.basename(_full_name)
    _pom_filename = _full_name.replace(".jar", ".pom")
    _metadata_filename = "%s/maven-metadata.xml" % ("/".join(_full_name.split("/")[:-2]),)
    _dependencies = []
    _versions = []

    _version = None
    _artifact_id = None
    _group_id = None
    _version_order = 0

    if os.path.exists(_pom_filename):
        try:
            _pom_json = json.loads(json.dumps(xmldict.parse(open(_pom_filename, "r").read())))
            _dependencies = _pom_json.get("project", {}).get("dependencies", {}).get("dependency", [])
        except Exception, e:
            print "Could not download/parse data from %s (%s)" % (_pom_filename, e)
Example #3
0
def main():
    base_url = 'http://mirrors.ibiblio.org/maven2/'

    fp = open('data/bugless_projects.json', 'r')
    c = 0

    for l in fp:
        arr = l.strip().split('||')
        group_id = arr[0]
        artifact_id = arr[1]
        c += 1

        maven_base_url = '%s%s/%s/' % (base_url, group_id.replace(
            '.', '/'), artifact_id)
        maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url, )
        local_maven_metadata_name = 'work/maven-metadata.xml.%d' % (c, )
        urllib.urlretrieve(maven_metadata_name, local_maven_metadata_name)
        json_xml = xmldict.parse(open(local_maven_metadata_name, 'r').read())
        versions = json_xml.get('metadata', {}).get('versioning',
                                                    {}).get('versions',
                                                            {}).get('version')
        version_list = []

        if isinstance(versions, list):
            version_list.extend(versions)
        else:
            version_list.append(versions)

        for v in version_list:
            vfile = '%s-%s.jar' % (artifact_id, v)
            vfile_url = '%s%s/%s' % (maven_base_url, v, vfile)
            urllib.urlretrieve(vfile_url, 'work/%s' % (vfile, ))
            print vfile_url

    fp.close()
Example #4
0
def main():
    base_url = 'http://mirrors.ibiblio.org/maven2/'

    fp = open('data/bugless_projects.json', 'r')
    c = 0

    for l in fp:
        arr = l.strip().split('||')
        group_id = arr[0]
        artifact_id = arr[1]
        c += 1

        maven_base_url = '%s%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id)
        maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,)
        local_maven_metadata_name = 'work/maven-metadata.xml.%d' % (c,)
        urllib.urlretrieve(maven_metadata_name, local_maven_metadata_name)
        json_xml = xmldict.parse(open(local_maven_metadata_name, 'r').read())
        versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version')
        version_list = []

        if isinstance(versions, list):
            version_list.extend(versions)
        else:
            version_list.append(versions)

        for v in version_list:
            vfile = '%s-%s.jar' % (artifact_id, v)
            vfile_url = '%s%s/%s' % (maven_base_url, v, vfile)
            urllib.urlretrieve(vfile_url, 'work/%s' % (vfile,))
            print vfile_url

    fp.close()
Example #5
0
def convert_findbugs_xml(findbugs_xml):
    import xmldict
    import json

    xml_data = open(findbugs_xml, 'r').read()
    result_json = json.loads(
        json.dumps(xmldict.parse(xml_data)).replace('"@', '"'))

    # -1: jar, -2: version, -3: artifact_id, -4: group_id
    _full_name = result_json.get('BugCollection', {}).get('Project',
                                                          {}).get('Jar', None)

    if _full_name is None:
        raise Exception('Jar tag is not found (%s)' % (result_json, ))

    print 'Processing ... %s' % (_full_name, )

    _jar_filename = os.path.basename(_full_name)
    _pom_filename = _full_name.replace('.jar', '.pom')
    _metadata_filename = '%s/maven-metadata.xml' % ('/'.join(
        _full_name.split('/')[:-2]), )
    _dependencies = []
    _versions = []

    _version = None
    _artifact_id = None
    _group_id = None
    _version_order = 0

    if os.path.exists(_pom_filename):
        try:
            _pom_json = json.loads(
                json.dumps(xmldict.parse(open(_pom_filename, 'r').read())))
            _dependencies = _pom_json.get('project',
                                          {}).get('dependencies',
                                                  {}).get('dependency', [])
        except Exception, e:
            print 'Could not download/parse data from %s (%s)' % (
                _pom_filename, e)
def main():
    base_url = '/Users/bkarak/devel/repositories/maven/maven/'
    col_obj = get_mongo_connection()[MONGO_COL]
    projects = load_projects_json()

    total_jars = 0
    missing = 0
    really_missing = 0

    for proj in projects:
        group_id = proj.group_id().strip()
        artifact_id = proj.artifact_id().strip()
        maven_base_url = '%s%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id)
        maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,)

        if not os.path.exists(maven_metadata_name):
            continue

        json_xml = xmldict.parse(open(maven_metadata_name, 'r').read())
        versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version')
        version_list = []

        if isinstance(versions, list):
            version_list.extend(versions)
        else:
            version_list.append(versions)

        for v in version_list:
            if v is not None:
                v = v.strip()
            docs = get_version(col_obj, group_id, artifact_id, v)
            total_jars += 1

            if len(docs) == 0:
                missing += 1
                sys.stderr.write('[%d]: Missing %s||%s||%s\n' % (total_jars, group_id, artifact_id, v))
                local_jar_path = '%s%s/%s-%s.jar' % (maven_base_url, v, artifact_id, v)

                if not os.path.exists(local_jar_path):
                    sys.stderr.write('[%d]: Invalid Jar: %s||%s||%s\n' % (total_jars, group_id, artifact_id, v))
                    really_missing += 1
                else:
                    if has_classes(local_jar_path):
                        sys.stderr.write('ADDED: Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing))
                        print "findbugs -textui -xml -output `basename %s`-findbugs.xml %s" % (local_jar_path, local_jar_path)
                    else:
                        really_missing += 1
                        sys.stderr.write('HAS_NO_CLASSES: %s\n' % (local_jar_path,))


    sys.stderr.write('Total: %d, Missing: %d (%d)\n' % (total_jars, missing - really_missing, missing))
def main():
    base_url = 'http://mirrors.ibiblio.org/maven2'
    col_obj = get_mongo_connection()[MONGO_COL]
    fp = open('data/missing_versions.txt', 'r')

    total_jars = 0
    missing = 0
    really_missing = 0

    for line in fp:
        (group_id, artifact_id) = line.strip().split('||')
        maven_base_url = '%s/%s/%s/' % (base_url, group_id.replace(
            '.', '/'), artifact_id)
        maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url, )

        try:
            fp = urlopen(maven_metadata_name)
            json_xml = xmldict.parse(fp.read())
            versions = json_xml.get('metadata',
                                    {}).get('versioning',
                                            {}).get('versions',
                                                    {}).get('version')
            version_list = []

            if isinstance(versions, list):
                version_list.extend(versions)
            else:
                version_list.append(versions)

            for v in version_list:
                docs = get_version(col_obj, group_id, artifact_id, v)
                total_jars += 1

                if len(docs) == 0:
                    missing += 1
                    print '[%d]: Missing %s||%s||%s' % (total_jars, group_id,
                                                        artifact_id, v)
                    url_jar_path = '/maven2/%s/%s/%s/%s-%s.jar' % (
                        group_id.replace('.',
                                         '/'), artifact_id, v, artifact_id, v)

                    if not url_exists('mirrors.ibiblio.org', url_jar_path):
                        really_missing += 1
                        print '%s%s' % ('mirrors.ibiblio.org', url_jar_path)
        except Exception, e:
            print '[%d] ERROR: %s (%s)' % (total_jars, maven_metadata_name, e)
            continue
def main():
    base_url = 'http://mirrors.ibiblio.org/maven2'
    col_obj = get_mongo_connection()[MONGO_COL]
    fp = open('data/missing_versions.txt', 'r')

    total_jars = 0
    missing = 0
    really_missing = 0

    for line in fp:
        (group_id, artifact_id) = line.strip().split('||')
        maven_base_url = '%s/%s/%s/' % (base_url, group_id.replace('.', '/'), artifact_id)
        maven_metadata_name = '%smaven-metadata.xml' % (maven_base_url,)

        try:
            fp = urlopen(maven_metadata_name)
            json_xml = xmldict.parse(fp.read())
            versions = json_xml.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version')
            version_list = []

            if isinstance(versions, list):
                version_list.extend(versions)
            else:
                version_list.append(versions)

            for v in version_list:
                docs = get_version(col_obj, group_id, artifact_id, v)
                total_jars += 1

                if len(docs) == 0:
                    missing += 1
                    print '[%d]: Missing %s||%s||%s' % (total_jars, group_id, artifact_id, v)
                    url_jar_path = '/maven2/%s/%s/%s/%s-%s.jar' % (group_id.replace('.', '/'), artifact_id, v, artifact_id, v)

                    if not url_exists('mirrors.ibiblio.org', url_jar_path):
                        really_missing += 1
                        print '%s%s' % ('mirrors.ibiblio.org', url_jar_path)
        except Exception, e:
            print '[%d] ERROR: %s (%s)' % (total_jars, maven_metadata_name, e)
            continue
#   1. Redistributions of source code must retain the above copyright
#      notice, this list of conditions and the following disclaimer.
#
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# Test Naive conversion of Findbugs XML to JSON (using the xmldict utility) 
#
# Authors:
#    Vassilios Karakoidas ([email protected])

import xmldict, json

findbugs_xml = open('acegi-security-0.5.jar-findbugs.xml', 'r')

print json.dumps(xmldict.parse(findbugs_xml)).replace('"@','"')
                        except Exception, e:
                            log.warn('Could not download/parse data from %s' % (_pom_filename,))

                        os.remove(_pom_filename)

                    # get xml information                    
                    _metadata_url = url.replace('%s/%s' % (_version, _jar_filename), 'maven-metadata.xml')
                    _metadata_filename = '%s-metadata.xml' % (_jar_filename,)
                    
                    log.info('Downloading %s -> %s' % (_metadata_url, _metadata_filename))
                    urllib.urlretrieve(_metadata_url, _metadata_filename)
                    _version_order = 0

                    if os.path.exists(_metadata_filename):
                        try:
                            _metadata_json = json.loads(json.dumps(xmldict.parse(open(_metadata_filename, 'r').read())))
                            _versions = _metadata_json.get('metadata', {}).get('versioning', {}).get('versions', {}).get('version', [])

                            if not isinstance(_versions, list):
                                _versions = [_versions]

                            _versions = [x.strip() for x in _versions]                            
                            try:
                                _version_order = _versions.index(_version.strip()) + 1
                            except ValueError, ve:
                                log.warn('Could not find version (%s in %s): %s' % (_version, _versions, ve))
                        except Exception, e:
                            log.warn('Could not parse data from %s: %s' % (_metadata_filename, e))

                        os.remove(_metadata_filename)
#   1. Redistributions of source code must retain the above copyright
#      notice, this list of conditions and the following disclaimer.
#
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
#
# Test Naive conversion of Findbugs XML to JSON (using the xmldict utility)
#
# Authors:
#    Vassilios Karakoidas ([email protected])

import xmldict, json

findbugs_xml = open('acegi-security-0.5.jar-findbugs.xml', 'r')

print json.dumps(xmldict.parse(findbugs_xml)).replace('"@', '"')