Esempio n. 1
0
File: Diff.py Progetto: cscorley/ohm
 def _printToLog(self, source, revision_number, log):
     if len(log) > 0:
         revCurr = self.project_repo.revCurr
         _make_dir('/tmp/ohm/')
         with open('/tmp/ohm/' + self.project_repo.project.name + '-errors.log', 'a') as f:
             f.write("\n\n***********************************\n\n")
             for each in log:
                 output = str(datetime.now())
                 output += ' ' + str(revCurr)
                 output += ' ' + source
                 output += ' ' + str(revision_number)
                 output += '\n\t' + each[0]
                 output += ' ' + each[1]
                 output += '\n\t' + str(each[2])
                 output += '\n'
                 f.write(output)
Esempio n. 2
0
    def get_file(self, file_name, revision_number, tries=5):
        if revision_number is None:
            revision_number = 0

        rev = pysvn.Revision(pysvn.opt_revision_kind.number, revision_number)

        # ensure the URL does not have an ending slash
        url = self.project.url
        if url.endswith('/'):
            url = url[:-1]

        # ensure the file_name does have a beginning slash
        if not file_name.startswith('/'):
            file_name = '/' + file_name

        # now url + file_name is valid
        # TODO: handle spaces? Used to work, but does not seem to be now

        # create output directory for checking out files
        output = '/tmp/ohm/'+ self.project.name + '-svn' + file_name
        _make_dir(output[:output.rfind('/')])

        while tries > 0:
            try:
                self.client.export(url + file_name, output,
                        revision=rev, recurse=False)
                # success!
                break
            except pysvn.ClientError as e:
                message, code = e.args[1][0]
                if code == 175002:
                    # Retry to check out file
                    print('Code:', code, 'Message:', message, '\n',
                            file_name, revision_number)
                    tries -= 1
                    continue
                else:
                    # Some other error, just quit trying
                    print('Code:', code, 'Message:', message, '\n',
                            file_name, revision_number)
                    break


        with open(output, 'r') as f:
            file_contents = ''.join(f.readlines())

        return file_contents
Esempio n. 3
0
File: ohm.py Progetto: cscorley/ohm
def main(argv):
    # Configure option parser
    optparser = OptionParser(usage='%prog [options]', version='0.1')
    optparser.set_defaults(force_drop=False)
    optparser.set_defaults(verbose=False)
    optparser.set_defaults(generate=False)
    optparser.set_defaults(build_db=False)
    optparser.set_defaults(tester=False)
    optparser.set_defaults(speed_run=False)
    optparser.set_defaults(output_dir='/tmp/ohm')
    optparser.set_defaults(project_revision='-1')
    optparser.set_defaults(project_revision_end='-1')
    optparser.set_defaults(database_host='localhost')
    optparser.set_defaults(database_port='5432')
    optparser.set_defaults(database_user='******')
    optparser.set_defaults(database_password='******')
    optparser.set_defaults(database_db='ohmdb')
    optparser.add_option('-o', '--output-dir', dest='output_dir',
            help='Output directory')
    optparser.add_option('-n', '--project_name', dest='project_name',
            help='Project name')
    optparser.add_option('-m', '--project_name2', dest='project_name2',
            help='Project name')
    optparser.add_option('-r', '--revision', dest='project_revision',
            help='Project revision to begin upon')
    optparser.add_option('-e', '--revision_end', dest='project_revision_end',
            help='Project revision to stop after')
    optparser.add_option('-f', '--force_drop', dest='force_drop',
            help='Drop all tables before beginning', action='store_true')
    optparser.add_option('-v', '--verbose', dest='verbose',
            help='Be verbose in output', action='store_true')
    optparser.add_option('-g', '--generate', dest='generate',
            help='Generate vectors', action='store_true')
    optparser.add_option('-t', '--tester', dest='tester',
            help='Run tester function', action='store_true')
    optparser.add_option('-s', '--speed_run', dest='speed_run',
            help='Run without database interactions', action='store_true')
    optparser.add_option('-b', '--build', dest='build_db',
            help='Run analysis and build database', action='store_true')
    optparser.add_option('-a', '--host', dest='database_host',
            help='Use a custom database host address')
    optparser.add_option('-p', '--port', dest='database_port',
            help='Use a custom database host port')
    optparser.add_option('-u', '--username', dest='database_user',
            help='Use a custom database username')
    optparser.add_option('-P', '--password', dest='database_password',
            help='Use a custom database host port')
    optparser.add_option('-d', '--database', dest='database_db',
            help='Use a custom database')

    # Invoke option parser
    (options, args) = optparser.parse_args(argv)

    starting_revision = options.project_revision
    ending_revision = options.project_revision_end

    if options.project_name is None:
        optparser.error('You must supply a project name!')
    else:
        project_name = options.project_name

        #if project_name.lower() in config.projects:
        #    project_url = base_svn + config.projects[project_name.lower()][0]

        if project_name not in config.projects:
            print('Project information not in config.py')
            sys.exit()

        project = config.projects[project_name]

    if options.project_name2 is not None:
        project_name = options.project_name2

        #if project_name.lower() in config.projects:
        #    project_url = base_svn + config.projects[project_name.lower()][0]

        if project_name not in config.projects:
            print('Project information not in config.py')
            sys.exit()

        project2 = config.projects[project_name]
        dual_speed_run(project, project2)

        sys.exit(0)

    # create output directory
    tmp_dir = '/'.join([options.output_dir.rstrip('/')])
    if False == os.path.exists(tmp_dir):
        _make_dir(tmp_dir)

    # open database connection
    db = Database(
            host=options.database_host,
            port=options.database_port,
            user=options.database_user,
            password=options.database_password,
            database=options.database_db,
            verbose=options.verbose
            )
    if options.tester:
    #    tester(db, project, starting_revision, ending_revision)
        compare_git_svn(db, config.projects["jhotdraw-git"], config.projects["jhotdraw"])
        sys.exit(0)


    if options.force_drop:
        db.force_drop()

    if options.speed_run:
        speed_run(project, starting_revision, ending_revision)

    if options.build_db:
        build_db(db, project, starting_revision, ending_revision)

    if options.generate:
       generate(db, project, starting_revision,
               ending_revision, False,
               ('class', 'enum', 'interface', '@interface'),
               profile_name='class_') # just leave name as 'profiles.txt'


       # generate the methods
       generate(db, project, starting_revision,
               ending_revision, False,
               ('method', ), profile_name='method_')

       # the following will seem weird, but in the database the full_name()
       # function will ignore file types when building the block's full name so
       # if we use it on the file type, we just get the package name. generate
       # will merge all the duplicate information into one package for us
       # afterward.

       # warning: if a file did not have an associated package, it will show up
       # in this list rather than the package name. this is a nifty workaround
       # to tracking package changes via the file changes.
       generate(db, project, starting_revision,
               ending_revision, False,
               ('file', ), profile_name='package_')

       # here, we will disable generates use of the full_name in its queries,
       # giving us only the file name (and more importantly, excluding the
       # package)
       generate(db, project, starting_revision,
               ending_revision, False,
               ('file', ), profile_name='file_',
               no_full_name_func=True)




    if not (options.force_drop or options.build_db or options.generate or
            options.speed_run):
        optparser.error('Did not have any action to perform. Must either drop\
                tables (-f), build tables (-b), or generate vectors from tables\
                (-g)')

    sys.exit(0)
Esempio n. 4
0
File: ohm.py Progetto: cscorley/ohm
def generate(db, project, starting_revision, ending_revision, use_sums,
        type_list, profile_name = '', no_full_name_func=False):
    # from type list, build query info
    typestr = 'block.type=%s or ' * len(type_list)
    typestr = typestr.rstrip(' or ')


    # set the name string used in the queries.
    if no_full_name_func:
        # just use the block's saved full_name as-is
        namestr = 'block.full_name'
    else:
        # use the sql function instead to build the full_name
        namestr = 'full_name(block.id)'

    # this dictionary is used throughout as a unique properties dictionary
    # used to get the UID of the entries in the table its used for. It should
    # always be reassigned when used.
    propDict = {
            'name': project.name,
            'url': project.url
            }
    # get the project uid
    pid = getUID(db, 'project', ('url',), propDict)

    revisions = db.execute('SELECT number from revision where project=%s \
            order by id desc',
            (pid, ))

    if revisions is None or len(revisions) == 0:
        print('Error: project has not been built yet, use -b')
        return

    output_dir = '/tmp/ohm/{name}-r{revision}/'.format(name=project.name,
            revision=revisions[0][0])
    if False == os.path.exists(output_dir):
        _make_dir(output_dir)

    owner_remap(db, project.name, pid)
    owners = db.execute('SELECT * from owner where project=%s',
            (pid, ))

    with open(output_dir + 'key.txt', 'w') as f:
        for each in owners:
            f.write('%s\n' % each[1])


    # before we start generating class vectors, lets build a list of duplicates
    # to save off for merging later
    dup_results = db.execute('select {name} from block where \
            project=%s and ({types}) group by {name} \
            having (count({name}) > 1)'.format(name=namestr, types=typestr),
            (pid, ) + tuple(type_list))
    duplicated = []

    # copy just the strings
    for d in dup_results:
        duplicated.append(d[0])


    data_table = 'change_data'

    if use_sums:
        data_table = data_table + '_sums'
    else:
        data_table = data_table + '_count'

    c = db.cursor
    c.execute('SELECT block.id, {name}, {table}.sum, owner_id \
            from {table} join block on {table}.block_id = block.id \
            where block.project=%s and \
            ({types})'.format(table=data_table,name=namestr,types=typestr),
            (pid, ) + tuple(type_list))

    curr_id = -1
    curr_full_name = ''
    ownership_profile = {}
    for o in owners:
        ownership_profile[o[0]] = 0
    duplicated_profiles = {}
    for d in duplicated:
        duplicated_profiles[d] = []

    with open(output_dir + profile_name + 'profiles.txt', 'w') as f:
        for each in c:
            if curr_id != each[0]:
                if curr_id != -1:
                    if curr_full_name in duplicated_profiles:
                        # using a dict of strings to hold lists of dicts
                        duplicated_profiles[curr_full_name].append(ownership_profile)
                    else:
                        valstr = '%s,' * len(ownership_profile)
                        valstr = valstr.rstrip(',') + '\n'
                        o_tuple = tuple(ownership_profile.values())
                        f.write(curr_full_name + ' ')
                        f.write(valstr % o_tuple)
                curr_id = each[0]
                curr_full_name = each[1]
                ownership_profile = {}
                for o in owners:
                    ownership_profile[o[0]] = 0

            ownership_profile[each[3]] = each[2]

            if c.rownumber == c.rowcount:
                if curr_full_name in duplicated_profiles:
                    # using a dict of strings to hold lists of dicts
                    duplicated_profiles[curr_full_name].append(ownership_profile)
                else:
                    valstr = '%s,' * len(ownership_profile)
                    valstr = valstr.rstrip(',') + '\n'
                    o_tuple = tuple(ownership_profile.values())
                    f.write(curr_full_name + ' ')
                    f.write(valstr % o_tuple)


        for d in duplicated_profiles:
            tmp_p = None
            for p in duplicated_profiles[d]:
                if tmp_p is None:
                    tmp_p = dict(p)
                else:
                    for elem in p:
                        tmp_p[elem] = tmp_p.get(elem, 0) + p[elem]

            if tmp_p is not None:
                valstr = '%s,' * len(tmp_p)
                valstr = valstr.rstrip(',') + '\n'
                o_tuple = tuple(tmp_p.values())
                f.write(d + ' ')
                f.write(valstr % o_tuple)