def test_send_error_pass_email(self, _, error): email = "none" notifier = MailNotifier(_subject, email) with patch('sys.stdout', new=StringIO()) as stdout_emulator: notifier.send_error(error) self.assertGreater(len(stdout_emulator.getvalue().strip()), 0)
def test_send_notification_pass_email(self): email = "none" notifier = MailNotifier(_subject, email) with patch('sys.stdout', new=StringIO()) as stdout_emulator: notifier.send_notification(_info) self.assertEqual(stdout_emulator.getvalue().strip(), _info)
def test_send_notification_with_error(self, _, email): notifier = MailNotifier(_subject, email) with patch('sys.stdout', new=StringIO()) as stdout_emulator: notifier.send_notification(_info) output = stdout_emulator.getvalue().strip() self.assertTrue(output.startswith(_info)) self.assertGreater(len(output), len(_info))
def __init__(self, extensions, slice, target_directory, email_notify, arg_modes, only_master): self.extensions = extensions self.notifier = MailNotifier(subject, email_notify) self.target_directory = target_directory self.slice = slice self.only_master = only_master for mode in arg_modes: exec(f"self.{mode} = {(mode in arg_modes)}") mkdir(temp_folder + '/languages') mkdir(temp_folder + '/repositories') mkdir(target_directory) for key in extensions: mkdir(os.path.join(temp_folder + '/languages', extensions[key]))
def dump_errors(email_notify, skipped_filename): with Connector() as connector: git_repos = connector.get_repositories_with_errors() writeLinesFile(skipped_filename, git_repos, mode='a', appendWithNewLine=True) MailNotifier(subject, email_notify).send_notification( f"Skipped {len(git_repos)} repositories: {git_repos}")
def prepare_list(repo_list: str, rep_count: int, email_notify: str): content = readLinesFile(repo_list) writeLinesFile(temp_repo_list, content[:rep_count], appendWithNewLine=True) MailNotifier(subject, email_notify).send_notification( f"Was repos: {content.__len__()}. " f"Taking only {rep_count} reps. " f"Become repos: {content[rep_count:].__len__()}.") writeLinesFile(repo_list, content[rep_count:], appendWithNewLine=True)
def calibrate(email, target_directory, only_master): notifier = MailNotifier(subject, email) root_reps = os.path.join(target_directory, 'repositories') root_langs = os.path.join(target_directory, 'languages') repos = _get_all_repos(root_reps) files = _log_files(repos, root_reps, root_langs) paths = _log_paths(repos, root_reps, root_langs) branches = _check_for_branches(root_reps, only_master) if len(paths) > 0: remove_answer = input( f"Remove {len(paths)} files not listed in paths.json? (Y/n) " ).lower() in ["yes", 'y'] if remove_answer: for file in tqdm(paths, desc="Remove corrupted files"): os.remove(file) timestamp = str(time.time_ns()) if len(files) > 0: writeLinesFile(f"{calibrate_folder}/calibration-files-{timestamp}.txt", files) notifier.send_notification( f"Total amount of errors with files: {len(files)}") if len(paths) > 0: writeLinesFile(f"{calibrate_folder}/calibration-paths-{timestamp}.txt", paths) notifier.send_notification( f"Total amount of errors with paths: {len(paths)}") if len(branches) > 0: writeLinesFile( f"{calibrate_folder}/calibration-branches-{timestamp}.txt", branches) notifier.send_notification( f"Total amount of errors with branches: {len(branches)}")
def normalize(email: str, target_directory: str, convert_modes: []): notifier = MailNotifier(subject, email) if len([item for item in convert_modes if item not in convert_state]) != 0: notifier.send_error( ValueError("Wrong convert state, must be one of " + str(convert_state))) files = select_all_files_with_extension(target_directory, '.py') filename = f"err_normalize-{time.time()}.txt" err_file = open(filename, "a") notifier.send_notification(f"Errors will be logged to {filename}") for file in tqdm(os.listdir(files)): try: if 'i' in convert_modes: import_con.convert_file(file) if 'c' in convert_modes: comments_con.convert_file(file) except KeyboardInterrupt: print('Aborted with KeyboardInterrupt') break except: err_file.write("Problem while converting:" + file + '\n') err_file.flush() try: ast.parse(open(file).read()) except KeyboardInterrupt: print('Aborted with KeyboardInterrupt') break except: err_file.write("Damaged:" + file + '\n') err_file.flush() notifier.send_notification( f"Dataset normalised with {len(open(filename, 'a').readlines())} damaged files" )
def test_mail_notifier_with_None_subject(self): email = "none" with self.assertRaises(TypeError): MailNotifier(None, email)
class Dataset: def __init__(self, extensions, slice, target_directory, email_notify, arg_modes, only_master): self.extensions = extensions self.notifier = MailNotifier(subject, email_notify) self.target_directory = target_directory self.slice = slice self.only_master = only_master for mode in arg_modes: exec(f"self.{mode} = {(mode in arg_modes)}") mkdir(temp_folder + '/languages') mkdir(temp_folder + '/repositories') mkdir(target_directory) for key in extensions: mkdir(os.path.join(temp_folder + '/languages', extensions[key])) # TODO Temporary out of work def pga(self): # TODO add creation of siva list # do_bash_command("pga list -l python -f json > list_repos.json") # with open("list_repos.json", "r") as dsc: # content = list(map(json.loads, dsc.readlines())) # # out_siva = [item for x in content[:self.slice] for item in x['sivaFilenames']] # # writeLinesFile(list_siva_temp, out_siva, appendWithNewLine=True) # # os.system(f"cat {dataDir + list_siva_temp} | pga get -i -o repos") files_total, repos_total = self.unpack_and_select_files(select_all_files_with_extension('repos', '.siva')) if os.path.exists("repos"): shutil.rmtree("repos") move_all_files_from_temp(self.target_directory, temp_folder) self.notifier.send_notification( # f"Left sivas: {len(content[self.slice:])}" f" Total files: {files_total} and add repos: {repos_total}") def borges(self, sivas_folder): full_siva = select_all_files_with_extension(sivas_folder, '.siva') out_siva = full_siva[:self.slice] if len(out_siva) == 0: print(f"No files in {sivas_folder} folder") return self.notifier.send_notification(f"Started download. " f"Total siva count: {len(full_siva)}. " f"Taking only {len(out_siva)} sivas") files_total, repos_total = self.unpack_and_select_files(out_siva) move_all_files_from_temp(self.target_directory, temp_folder) self.notifier.send_notification(f"Finnished. Add files: {files_total} and sivas: {repos_total}\n") def copy_file(self, file, file_name, destination, repository, files_data: dict, files_paths: dict): """Copy one file to the target directory and solving the problem of identical file names. """ base, ext = os.path.splitext(file) if ext in self.extensions: new_name = (base + '_' + str(time.time_ns()) + ext) sortedDestination = self.extensions[ext] new_name = os.path.join(destination, sortedDestination, repository, new_name) mkdir(os.path.join(destination, sortedDestination, repository)) try: shutil.copy(file_name, new_name) except OSError: tqdm.write(new_name) new_name = (str(time.time_ns()) + ext) new_name = os.path.join(destination, sortedDestination, repository, new_name) shutil.copy(file_name, new_name) if self.paths: files_paths[new_name.split('/')[-1]] = file_name.split('/', 1)[1] if self.files and sortedDestination in files_data: files_data[sortedDestination] += 1 else: files_data[sortedDestination] = 1 return 1 else: return 0 def copy_files_from_dir(self, directory, repository): """Checking on which Python version the file is written. If Python 3 than copy the file to the target directory. """ files_count = 0 files_data = {} files_paths = {} for root, dirs, files in os.walk(directory): for file in files: file_name = os.path.join(root, file) if os.path.isfile(file_name): files_count += self.copy_file(file, file_name, temp_folder + '/languages', repository, files_data, files_paths) if self.paths: writeJsonFile(f"{temp_folder}/repositories/{repository}/paths.json", files_paths, dataFolder=False) if self.files: writeJsonFile(f"{temp_folder}/repositories/{repository}/files.json", files_data, dataFolder=False) if self.histories: write_repository_history(repository) return files_count def unpack_and_select_files(self, out_siva): total_files = 0 repos_num = 0 for file in tqdm(out_siva): siva_name = unpack_siva(file) new_files_count = 0 # copying files for branch in head_branches(): checkout_branch(branch) repo = repository_by_branch(branch) if self.only_master: repo += "/master" else: repo += '/' + siva_name if repo is None: continue if self.only_master and os.path.exists(os.path.join(self.target_directory + repo)): continue new_files_count = self.copy_files_from_dir(siva_folder, repo) repos_num += 1 total_files += new_files_count # deleting the directory with siva files we have already used shutil.rmtree(siva_folder) os.remove(file) return total_files, repos_num
def __init__(self, email, target_directory): self.notifier = MailNotifier(subject, email) self.target_directory = target_directory
class Statistic: def __init__(self, email, target_directory): self.notifier = MailNotifier(subject, email) self.target_directory = target_directory def stat_languages(self, languages_file): logLang = ["Folder, number or files"] languages = readLinesFile(languages_file) for language in tqdm(languages): directory = os.path.join(self.target_directory, 'languages', language) count = count_all_files(directory, None) logLang.append(f"{language}, {count}\n") if len(logLang) > 0: writeLinesFile( f"{statistic_folder}/stat-languages-{str(time.time_ns())}.csv", logLang) self.notifier.send_notification(logLang) def stat_extensions(self, extensions): logExt = ["Folder, number or files"] for ext, path in tqdm(extensions.items()): directory = os.path.join(self.target_directory, 'languages', path) count = count_all_files(directory, ext) logExt.append(f"{path}, {count}\n") if len(logExt) > 0: writeLinesFile( f"{statistic_folder}/statistic-extensions-{str(time.time_ns())}.csv", logExt) self.notifier.send_notification(logExt) def stat_repositories(self): logRepos = [] total = 0 for user in tqdm(os.listdir(self.target_directory)): for rep in os.listdir(os.path.join(self.target_directory, user)): url = f"https://github.com/{user}/{rep}/\n" total += len( os.listdir(os.path.join(self.target_directory, user, rep))) logRepos.append(url) if len(logRepos) > 0: writeLinesFile( f"{statistic_folder}/statistic-repositories-{str(time.time_ns())}.txt", logRepos) self.notifier.send_notification( f"Total amount of repositories with different branches: {total}" ) def gather_statistic(self, modes, languages_file, exts): if 'languages' in modes: self.stat_languages(languages_file) if 'extensions' in modes: self.stat_extensions(exts) if 'repositories' in modes: self.stat_repositories()