コード例 #1
0
    def test_send_error_pass_email(self, _, error):
        email = "none"
        notifier = MailNotifier(_subject, email)

        with patch('sys.stdout', new=StringIO()) as stdout_emulator:
            notifier.send_error(error)

            self.assertGreater(len(stdout_emulator.getvalue().strip()), 0)
コード例 #2
0
    def test_send_notification_pass_email(self):
        email = "none"
        notifier = MailNotifier(_subject, email)

        with patch('sys.stdout', new=StringIO()) as stdout_emulator:
            notifier.send_notification(_info)

            self.assertEqual(stdout_emulator.getvalue().strip(), _info)
コード例 #3
0
    def test_send_notification_with_error(self, _, email):
        notifier = MailNotifier(_subject, email)
        with patch('sys.stdout', new=StringIO()) as stdout_emulator:
            notifier.send_notification(_info)
            output = stdout_emulator.getvalue().strip()

            self.assertTrue(output.startswith(_info))
            self.assertGreater(len(output), len(_info))
コード例 #4
0
    def __init__(self, extensions, slice, target_directory, email_notify, arg_modes, only_master):
        self.extensions = extensions
        self.notifier = MailNotifier(subject, email_notify)
        self.target_directory = target_directory
        self.slice = slice
        self.only_master = only_master

        for mode in arg_modes:
            exec(f"self.{mode} = {(mode in arg_modes)}")

        mkdir(temp_folder + '/languages')
        mkdir(temp_folder + '/repositories')
        mkdir(target_directory)

        for key in extensions:
            mkdir(os.path.join(temp_folder + '/languages', extensions[key]))
コード例 #5
0
def dump_errors(email_notify, skipped_filename):
    with Connector() as connector:
        git_repos = connector.get_repositories_with_errors()

        writeLinesFile(skipped_filename,
                       git_repos,
                       mode='a',
                       appendWithNewLine=True)

        MailNotifier(subject, email_notify).send_notification(
            f"Skipped {len(git_repos)} repositories: {git_repos}")
コード例 #6
0
def prepare_list(repo_list: str, rep_count: int, email_notify: str):
    content = readLinesFile(repo_list)

    writeLinesFile(temp_repo_list, content[:rep_count], appendWithNewLine=True)

    MailNotifier(subject, email_notify).send_notification(
        f"Was repos: {content.__len__()}. "
        f"Taking only {rep_count} reps. "
        f"Become repos: {content[rep_count:].__len__()}.")

    writeLinesFile(repo_list, content[rep_count:], appendWithNewLine=True)
コード例 #7
0
def calibrate(email, target_directory, only_master):
    notifier = MailNotifier(subject, email)
    root_reps = os.path.join(target_directory, 'repositories')
    root_langs = os.path.join(target_directory, 'languages')

    repos = _get_all_repos(root_reps)
    files = _log_files(repos, root_reps, root_langs)
    paths = _log_paths(repos, root_reps, root_langs)
    branches = _check_for_branches(root_reps, only_master)

    if len(paths) > 0:
        remove_answer = input(
            f"Remove {len(paths)} files not listed in paths.json? (Y/n) "
        ).lower() in ["yes", 'y']

        if remove_answer:
            for file in tqdm(paths, desc="Remove corrupted files"):
                os.remove(file)

    timestamp = str(time.time_ns())

    if len(files) > 0:
        writeLinesFile(f"{calibrate_folder}/calibration-files-{timestamp}.txt",
                       files)
        notifier.send_notification(
            f"Total amount of errors with files: {len(files)}")
    if len(paths) > 0:
        writeLinesFile(f"{calibrate_folder}/calibration-paths-{timestamp}.txt",
                       paths)
        notifier.send_notification(
            f"Total amount of errors with paths: {len(paths)}")
    if len(branches) > 0:
        writeLinesFile(
            f"{calibrate_folder}/calibration-branches-{timestamp}.txt",
            branches)
        notifier.send_notification(
            f"Total amount of errors with branches: {len(branches)}")
コード例 #8
0
def normalize(email: str, target_directory: str, convert_modes: []):
    notifier = MailNotifier(subject, email)
    if len([item for item in convert_modes if item not in convert_state]) != 0:
        notifier.send_error(
            ValueError("Wrong convert state, must be one of " +
                       str(convert_state)))

    files = select_all_files_with_extension(target_directory, '.py')
    filename = f"err_normalize-{time.time()}.txt"
    err_file = open(filename, "a")
    notifier.send_notification(f"Errors will be logged to {filename}")

    for file in tqdm(os.listdir(files)):
        try:
            if 'i' in convert_modes:
                import_con.convert_file(file)
            if 'c' in convert_modes:
                comments_con.convert_file(file)
        except KeyboardInterrupt:
            print('Aborted with KeyboardInterrupt')
            break
        except:
            err_file.write("Problem while converting:" + file + '\n')
            err_file.flush()
        try:
            ast.parse(open(file).read())
        except KeyboardInterrupt:
            print('Aborted with KeyboardInterrupt')
            break
        except:
            err_file.write("Damaged:" + file + '\n')
            err_file.flush()

    notifier.send_notification(
        f"Dataset normalised with {len(open(filename, 'a').readlines())} damaged files"
    )
コード例 #9
0
 def test_mail_notifier_with_None_subject(self):
     email = "none"
     with self.assertRaises(TypeError):
         MailNotifier(None, email)
コード例 #10
0
class Dataset:

    def __init__(self, extensions, slice, target_directory, email_notify, arg_modes, only_master):
        self.extensions = extensions
        self.notifier = MailNotifier(subject, email_notify)
        self.target_directory = target_directory
        self.slice = slice
        self.only_master = only_master

        for mode in arg_modes:
            exec(f"self.{mode} = {(mode in arg_modes)}")

        mkdir(temp_folder + '/languages')
        mkdir(temp_folder + '/repositories')
        mkdir(target_directory)

        for key in extensions:
            mkdir(os.path.join(temp_folder + '/languages', extensions[key]))

    # TODO Temporary out of work
    def pga(self):
        # TODO add creation of siva list
        # do_bash_command("pga list -l python -f json > list_repos.json")
        # with open("list_repos.json", "r") as dsc:
        #     content = list(map(json.loads, dsc.readlines()))
        #
        # out_siva = [item for x in content[:self.slice] for item in x['sivaFilenames']]
        #
        # writeLinesFile(list_siva_temp, out_siva, appendWithNewLine=True)
        #
        # os.system(f"cat {dataDir + list_siva_temp} | pga get -i -o repos")

        files_total, repos_total = self.unpack_and_select_files(select_all_files_with_extension('repos', '.siva'))

        if os.path.exists("repos"):
            shutil.rmtree("repos")

        move_all_files_from_temp(self.target_directory, temp_folder)

        self.notifier.send_notification(
            # f"Left sivas: {len(content[self.slice:])}"
            f" Total files: {files_total} and add repos: {repos_total}")

    def borges(self, sivas_folder):
        full_siva = select_all_files_with_extension(sivas_folder, '.siva')

        out_siva = full_siva[:self.slice]

        if len(out_siva) == 0:
            print(f"No files in {sivas_folder} folder")
            return

        self.notifier.send_notification(f"Started download. "
                                        f"Total siva count: {len(full_siva)}. "
                                        f"Taking only {len(out_siva)} sivas")

        files_total, repos_total = self.unpack_and_select_files(out_siva)
        move_all_files_from_temp(self.target_directory, temp_folder)

        self.notifier.send_notification(f"Finnished. Add files: {files_total} and sivas: {repos_total}\n")

    def copy_file(self, file, file_name, destination, repository, files_data: dict, files_paths: dict):
        """Copy one file to the target directory
            and solving the problem of identical file names.
        """
        base, ext = os.path.splitext(file)
        if ext in self.extensions:
            new_name = (base + '_' + str(time.time_ns()) + ext)
            sortedDestination = self.extensions[ext]
            new_name = os.path.join(destination, sortedDestination, repository, new_name)

            mkdir(os.path.join(destination, sortedDestination, repository))
            try:
                shutil.copy(file_name, new_name)
            except OSError:
                tqdm.write(new_name)
                new_name = (str(time.time_ns()) + ext)
                new_name = os.path.join(destination, sortedDestination, repository, new_name)
                shutil.copy(file_name, new_name)

            if self.paths:
                files_paths[new_name.split('/')[-1]] = file_name.split('/', 1)[1]

            if self.files and sortedDestination in files_data:
                files_data[sortedDestination] += 1
            else:
                files_data[sortedDestination] = 1
            return 1
        else:
            return 0

    def copy_files_from_dir(self, directory, repository):
        """Checking on which Python version the file is written.
           If Python 3 than copy the file to the target directory.
        """
        files_count = 0
        files_data = {}
        files_paths = {}
        for root, dirs, files in os.walk(directory):
            for file in files:
                file_name = os.path.join(root, file)
                if os.path.isfile(file_name):
                    files_count += self.copy_file(file, file_name, temp_folder + '/languages', repository,
                                                  files_data, files_paths)

        if self.paths:
            writeJsonFile(f"{temp_folder}/repositories/{repository}/paths.json", files_paths, dataFolder=False)
        if self.files:
            writeJsonFile(f"{temp_folder}/repositories/{repository}/files.json", files_data, dataFolder=False)
        if self.histories:
            write_repository_history(repository)

        return files_count

    def unpack_and_select_files(self, out_siva):
        total_files = 0
        repos_num = 0
        for file in tqdm(out_siva):
            siva_name = unpack_siva(file)
            new_files_count = 0

            # copying files
            for branch in head_branches():
                checkout_branch(branch)
                repo = repository_by_branch(branch)
                if self.only_master:
                    repo += "/master"
                else:
                    repo += '/' + siva_name
                if repo is None:
                    continue
                if self.only_master and os.path.exists(os.path.join(self.target_directory + repo)):
                    continue
                new_files_count = self.copy_files_from_dir(siva_folder, repo)
                repos_num += 1
            total_files += new_files_count

            # deleting the directory with siva files we have already used
            shutil.rmtree(siva_folder)
            os.remove(file)
        return total_files, repos_num
コード例 #11
0
 def __init__(self, email, target_directory):
     self.notifier = MailNotifier(subject, email)
     self.target_directory = target_directory
コード例 #12
0
class Statistic:
    def __init__(self, email, target_directory):
        self.notifier = MailNotifier(subject, email)
        self.target_directory = target_directory

    def stat_languages(self, languages_file):
        logLang = ["Folder, number or files"]
        languages = readLinesFile(languages_file)

        for language in tqdm(languages):
            directory = os.path.join(self.target_directory, 'languages',
                                     language)
            count = count_all_files(directory, None)
            logLang.append(f"{language}, {count}\n")

        if len(logLang) > 0:
            writeLinesFile(
                f"{statistic_folder}/stat-languages-{str(time.time_ns())}.csv",
                logLang)
            self.notifier.send_notification(logLang)

    def stat_extensions(self, extensions):
        logExt = ["Folder, number or files"]

        for ext, path in tqdm(extensions.items()):
            directory = os.path.join(self.target_directory, 'languages', path)
            count = count_all_files(directory, ext)
            logExt.append(f"{path}, {count}\n")

        if len(logExt) > 0:
            writeLinesFile(
                f"{statistic_folder}/statistic-extensions-{str(time.time_ns())}.csv",
                logExt)
            self.notifier.send_notification(logExt)

    def stat_repositories(self):
        logRepos = []
        total = 0

        for user in tqdm(os.listdir(self.target_directory)):
            for rep in os.listdir(os.path.join(self.target_directory, user)):
                url = f"https://github.com/{user}/{rep}/\n"
                total += len(
                    os.listdir(os.path.join(self.target_directory, user, rep)))
                logRepos.append(url)

        if len(logRepos) > 0:
            writeLinesFile(
                f"{statistic_folder}/statistic-repositories-{str(time.time_ns())}.txt",
                logRepos)
            self.notifier.send_notification(
                f"Total amount of repositories with different branches: {total}"
            )

    def gather_statistic(self, modes, languages_file, exts):
        if 'languages' in modes:
            self.stat_languages(languages_file)
        if 'extensions' in modes:
            self.stat_extensions(exts)
        if 'repositories' in modes:
            self.stat_repositories()