for x in range(0,len(files)): if x > 1: print("completed: ", round((x * 100) / len(files),1), "% ", end='\r') file_name = os.path.join("./processed", files[x]) try: df = pd.read_csv(filepath_or_buffer = file_name, index_col=0, dtype = str, na_filter=False) # df = df.drop(["id_df"], axis=1) n_starting_triplets += len(df) cleaner = Cleaner(df, t5_tokenizer, stopwords, english_cache) cleaner.remove_non_marked() cleaner.clean_df() # final cleaning : remove methods which has more than one review cleaner.remove_multiple_method_comments() n_irrelevant_comments += cleaner.irrelevant_comments n_not_marked += cleaner.not_marked n_non_latin += cleaner.non_latin n_before_equals_after += cleaner.before_equals_after n_non_english += cleaner.non_english n_too_long += cleaner.too_long n_too_long_after += cleaner.too_long_after n_multiple_rev += cleaner.multiple_reviews n_comment_empty += cleaner.comment_empty n_code_before_empty += cleaner.code_before_empty n_code_before_marked_empty += cleaner.code_before_marked_empty n_code_after_empty += cleaner.code_after_empty