Exemple #1
0
    def analyse(self):

        ending = ''
        # sets an ending variable to differentate the source code files of Python and Java repos
        if self.repository.language == "Python":
            ending = '**/*.py'
        elif self.repository.language == "Java":
            ending = '**/*.java'
        else:
            print(
                "Error in analyse: language of repository matches neither Python nor Java"
            )
            return

        directory_in_str = self.path + self.repository.name

        pathlist = Path(directory_in_str).glob(ending)

        # loops through all folders in a repository to find all source code files and analyses them
        for path in pathlist:
            # because path is object not string
            path_in_str = str(path)
            analysis = pygount.source_analysis(path_in_str,
                                               self.repository.language,
                                               encoding='utf-8')
            self.codelines += analysis.code + analysis.string
            self.emptylines += analysis.empty
            self.commentlines += analysis.documentation

        print("Analysed Repository: ", self.repository.name)
        return
Exemple #2
0
def analyze(repo):
    extensions = LANGUAGES[repo['language']]['extensions']

    r = Repo(repo['owner'],
             repo['name'],
             default_branch=repo['default_branch'],
             language=repo['language'],
             extensions=extensions)

    json_path = f'{r.path}.json'

    if os.path.isfile(json_path):
        repo = read_json(json_path)
        return repo

    repo['code'], repo['documentation'], repo['empty'] = (0, 0, 0)

    with r:
        for f in r.files:
            # Ignore symlinks.
            if not os.path.isfile(f):
                continue

            analysis = pygount.source_analysis(f,
                                               repo['language'],
                                               encoding='utf-8')

            if analysis.state == 'analyzed':
                repo['code'] += analysis.code + analysis.string
                repo['documentation'] += analysis.documentation
                repo['empty'] += analysis.empty

    write_json(json_path, repo)

    return repo
    def button_analyse_code(self):
        IrModuleAuthor = self.env['ir.module.author']
        IrModuleTypeRule = self.env['ir.module.type.rule']
        rules = IrModuleTypeRule.search([])

        cfg = self.env['ir.config_parameter']
        val = cfg.get_param('module_analysis.exclude_directories', '')
        exclude_directories = [x.strip() for x in val.split(',') if x.strip()]
        val = cfg.get_param('module_analysis.exclude_files', '')
        exclude_files = [x.strip() for x in val.split(',') if x.strip()]

        for module in self:
            _logger.info("Analysing Code for module %s ..." % (module.name))

            # Update Authors, based on manifest key
            authors = []
            if module.author and module.author[0] == '[':
                author_txt_list = safe_eval(module.author)
            else:
                author_txt_list = module.author.split(',')

            author_txt_list = [x.strip() for x in author_txt_list]
            author_txt_list = [x for x in author_txt_list if x]
            for author_txt in author_txt_list:
                authors.append(IrModuleAuthor._get_or_create(author_txt))

            author_ids = [x.id for x in authors]
            module.author_ids = author_ids

            # Update Module Type, based on rules
            module_type_id = rules._get_module_type_id_from_module(module)
            module.module_type_id = module_type_id

            # Get Path of module folder and parse the code
            module_path = get_module_path(module.name)

            # Get Files
            analysed_datas = self._get_analyse_data_dict()
            file_extensions = analysed_datas.keys()
            file_list = self._get_files_to_analyse(module_path,
                                                   file_extensions,
                                                   exclude_directories,
                                                   exclude_files)

            for file_path, file_ext in file_list:
                file_res = pygount.source_analysis(
                    file_path,
                    '',
                    encoding=self._get_module_encoding(file_ext))
                for k, v in analysed_datas.get(file_ext).items():
                    v['value'] += getattr(file_res, k)

            # Update the module with the datas
            values = {}
            for file_ext, analyses in analysed_datas.items():
                for k, v in analyses.items():
                    values[v['field']] = v['value']
            module.write(values)
Exemple #4
0
    def scan_sloc(self, repo_name, repo_path, repo_owner, skip_path, suffix):
        db_session = self.create_db_session()
        row_repo = self.load_repository(db_session, repo_name)
        if row_repo == None:
            row_repo = Repository(name=repo_name, path=repo_path, owner=repo_owner)
            db_session.add(row_repo)

        gitrepo = GitRepo(repo_path + '/.git')
        existing_revision = [ _.hash for _ in row_repo.revisions]
        log.info(f'repo_name: {row_repo.name}')
        log.info(f'repo_path: {row_repo.path}')
        log.info(f'repo_owner: {row_repo.owner}')
        log.info(f'size of existing_revision: {len(existing_revision)}')

        analysis_cache = {}
        for commit in gitrepo.get_all_commit_id():
            date = datetime.datetime.fromtimestamp(int(commit.commit_time)).strftime('%Y-%m-%d %H:%M:%S')
            hash = str(commit.id)
            log.info(f'{date}, {hash}')

            if hash not in existing_revision:
                log.info('add revision')
                existing_revision.append(hash)
                row_revision = Revision(hash=hash,
                                        commit_time=datetime.datetime.fromtimestamp(int(commit.commit_time)))
                row_repo.revisions.append(row_revision)
                gitrepo.checkout_by(hash)

                row_revision.slocs = []
                for f in self.source_scanner(repo_path, skip_path, suffix):
                    fcontent = ''
                    with open(f, 'rb') as fh:
                        fcontent = fh.read()

                    if f in analysis_cache and analysis_cache[f][1] == fcontent:
                        analysis = analysis_cache[f][0]
                        # log.info('Use cache in analysis: %s', f)
                    else:
                        analysis = pygount.source_analysis(f, group='pygount', encoding='automatic')
                        analysis_cache[f] = (analysis, fcontent)
                        log.info(f'Analysis: {f}')

                    row_revision.slocs.append(Sloc(language=analysis.language,
                                                   filename=f, source_line=analysis.code,
                                                   empty_line=analysis.empty))
            db_session.commit()

        log.info('End of scaning.')
Exemple #5
0
def analyze_directory(directory, include, exclude):
    """
    Analyze source code files from directory applying given include and
    exclude patterns.

    :param str directory: Directory to search for files to analyze.
    :param list include: List of include patterns.
    :param list exclude: List of exclude patterns.

    :return: A dictionary mapping the file analyzed and the language detected
     and a dictionary mapping the languages detected in the directory and
     the total amount of lines of code, documentation, empty and string.
    :rtype: tuple of dict
    """
    from pygount import source_analysis

    # Collect files
    collected = collect_files(directory, include, exclude)

    # Analyze files
    files = OrderedDict()
    sloc = OrderedDict()

    for relfname, absfname in joiner(directory, collected):
        # Perform analysis
        analysis = source_analysis(
            absfname, '', fallback_encoding='utf-8'
        )

        # Get language and register the file as analyzed
        lang = analysis.language.lower()
        files[relfname] = lang

        # Ignore unknown and binary files
        if lang in ['__unknown__', '__binary__']:
            continue

        # Grab counter and perform accumulation
        counter = sloc.setdefault(lang, OrderedDict())

        for count in ['code', 'documentation', 'empty', 'string']:
            counter[count] = counter.get(count, 0) + getattr(analysis, count)

    return files, sloc
Exemple #6
0
    def process_file(self, file, path):
        try:
            analysis = pygount.source_analysis(file,
                                               str(path),
                                               encoding='chardet')
            if analysis is not None and analysis.code > 0 and analysis.language is not None:
                self.analysis[file] = analysis
                if analysis.language in self.lang_files:
                    self.lang_files[analysis.language].append(file)
                else:
                    self.lang_files[analysis.language] = [file]
                self.process_files += 1
                print(analysis.language,
                      len(self.lang_files[analysis.language]),
                      "processed files:",
                      self.process_files,
                      end=" " * 80 + "\r")

        except:
            self.log.warning("couldn't process file {}".format(file))
            pass
Exemple #7
0
def extract_LOC(file):
    analysis = pygount.source_analysis(file, 'pygount')
    try:
        return analysis.code, analysis.documentation
    except:
        return 0, 0
def execute(root_dir):
    """
    Returns a dictionary containing the URL of a repository, number of lines, external libraries used , average nesting factor, percentage of code duplication, average number of parameters and average number of variables of the repository.
    
    Parameter:
    root_dir(path): Path to the local repository.
    Return:
    A dictionary containing the URL of a repository, number of lines, external libraries used , average nesting factor, percentage of code duplication, average number of parameters and average number of variables of the repository.
    
    """

    #Getting all the file recursively that py files
    lenght = []
    libraries = []
    nesting_factors = []
    param_count = []
    total_var = []
    duplicate_for_the_repo = []
    average_nesting_factor = 0
    average_param = 0
    code_duplication = 0
    avg_var = 0

    k = root_dir.rsplit('-')
    n = k[0]
    m = k[-1]

    urls = [repo for repo in repo_list if n and m in repo]
    if urls:
        url = urls[0]
    else:
        url = root_dir

    for filename in glob.iglob(root_dir + '/**/*.py', recursive=True):
        #filename=filename.replace(" ", "\\ ")
        filename = str_to_raw(filename)
        try:
            count = pygount.source_analysis(
                filename,
                'pygount')  # counting the line of codes for the py files
            l = count.code
            lenght.append(l)
            library = imported_module(filename)
            for lib in library:
                libraries.append(lib)
            deg_list = nesting_factor(for_loop_position(filename))
            for deg in deg_list:
                nesting_factors.append(deg)

            for param in parameter_count(filename):
                param_count.append(param)
            for var in variable_count(filename):
                total_var.append(var)
            duplicate_for_the_repo.append(duplicated_line(filename))
        except Exception as e:
            print("type error: " + str(e))
            print(filename)

    if len(nesting_factors) != 0:
        average_nesting_factor = np.mean(nesting_factors)
    if param_count:
        average_param = np.mean(param_count)
    libraries = unique(libraries)
    repo_count = sum(lenght)
    if total_var:
        avg_var = np.mean(total_var)
    if repo_count and duplicate_for_the_repo:
        code_duplication = (sum(duplicate_for_the_repo) / repo_count) * 100

    return {
        'repository_url': url,
        'number of lines': repo_count,
        'libraries': libraries,
        'nesting factor': average_nesting_factor,
        'code duplication': code_duplication,
        'average parameters': average_param,
        'average variables': avg_var
    }
    bigger_file = None
    listdirs = next(os.walk(os.path.join(RESULTS_FOLDER, folder)))[1]
    if not listdirs:
        shutil.rmtree(os.path.join(RESULTS_FOLDER, folder))
    else:
        for subfolder in listdirs:
            files = next(
                os.walk(os.path.join(RESULTS_FOLDER, folder, subfolder)))[2]
            if not files:
                shutil.rmtree(os.path.join(RESULTS_FOLDER, folder, subfolder))
            else:
                for file in files:
                    content = ""
                    invalid = False
                    n_lines = pygount.source_analysis(
                        os.path.join(RESULTS_FOLDER, folder, subfolder, file),
                        '').code
                    with open(
                            os.path.join(RESULTS_FOLDER, folder, subfolder,
                                         file), 'r') as f:
                        for line in f:
                            if is_invalid_line(line):
                                invalid = True
                                break
                            content += line
                            n_lines += 1

                    if invalid:
                        continue

                    try:
Exemple #10
0
def main():
    parser = argparse.ArgumentParser(
        description='''Summarize github stats for a user or organization.''')
    parser.add_argument('user', type=str, help='Github user or organization')
    parser.add_argument(
        '--include',
        type=str,
        help='''One or more repos to include, separated by commas.
                        Note that --exclude takes priority over --include.
                        ''')
    parser.add_argument(
        '--exclude',
        type=str,
        help='''One or more repos to exclude, separated by commas.
                        Note that --exclude takes priority over --include.
                        ''')
    parser.add_argument('-d',
                        '--debug',
                        default=False,
                        type=bool,
                        help='Show debug messages (default = False)')
    args = parser.parse_args()
    # "user" means user or organization
    user = args.user
    include = [] if args.include is None else args.include.split(',')
    exclude = [] if args.exclude is None else args.exclude.split(',')

    g = None
    environ = os.environ
    github_username = None
    if 'GITHUB_USERNAME' in environ:
        github_username = environ['GITHUB_USERNAME']
    github_pwd = None
    if 'GITHUB_PWD' in environ:
        github_pwd = environ['GITHUB_PWD']
    if (github_username is not None) and (github_pwd is not None):
        g = Github(github_username, github_pwd)
    else:
        g = Github()

    repo_count = 0
    commit_count = 0
    loc_count = 0

    for repo in g.get_user(user).get_repos():
        repo_name = repo.name
        if ((len(exclude) == 0 or repo_name not in exclude)
                and (len(include) == 0 or repo_name in include)):
            repo_count += 1
            print(str(repo_count) + ') analyzing ' + repo_name + '...')
            for commit in repo.get_commits():
                commit_count += 1

            url_vars = {'user': user, 'repo': repo.name}
            url = 'https://github.com/{user}/{repo}/archive/master.zip'.format(
                **url_vars)

            r = requests.get(url, stream=True)
            if r.ok:
                z = zipfile.ZipFile(io.BytesIO(r.content))
                with tempfile.TemporaryDirectory() as tmpdirname:
                    z.extractall(tmpdirname)
                    scanner = pygount.analysis.SourceScanner([tmpdirname])
                    source_paths = list(scanner.source_paths())
                    for source_path in source_paths:
                        analysis = pygount.source_analysis(
                            source_path[0], repo.name)
                        loc_count += analysis[3]

    print('Totals')
    print('************************')
    print('repos: ' + str(repo_count))
    print('commits: ' + str(commit_count))
    print('lines (LOC): ' + str(loc_count))

    return {
        'repos': repo_count,
        'commits': commit_count,
        'lines': loc_count,
    }