def clean_directory(self, **kwargs): """ Remove files with with less than 5kb and/or from other domains """ files = fx.get_fnames(self.directory) if 'errors' in files: # remove log file files.remove('errors') files_to_remove = [] if 'clear_small' in kwargs: ids = [ f for f in files if os.path.getsize(self.directory + '\\' + f + '.pkl') < 10000 ] files_to_remove.extend( [self.directory + '\\' + f + '.pkl' for f in ids]) dbx.delete_rows(self.db, self.table, 'id', ids) if len(files_to_remove) > 0: fx.delete_files(files_to_remove) # verify if links correspond to domain if 'save' in kwargs: data = [] for id in tqdm(files): doc = fx.load_pickle(self.directory + '\\' + id + '.pkl') link = doc['canonical_link'] data.append((id, link)) df = pd.DataFrame(data, columns=['id', 'real']) pdx.save_to_csv(df, os.path.join(self.corpus, self.domain))
def teardown(self): super(JavaCodeEvaluator, self).teardown() # Delete the created file. os.remove(self.submit_code_path) if os.path.exists(self.user_output_path): os.remove(self.user_output_path) if os.path.exists(self.ref_output_path): os.remove(self.ref_output_path) if self.files: delete_files(self.files)
def teardown(self): super(CppCodeEvaluator, self).teardown() # Delete the created file. os.remove(self.submit_code_path) if os.path.exists(self.ref_output_path): os.remove(self.ref_output_path) if os.path.exists(self.user_output_path): os.remove(self.user_output_path) if self.files: delete_files(self.files)
def teardown(self): super(CppStdioEvaluator, self).teardown() os.remove(self.submit_code_path) if self.files: delete_files(self.files)
def teardown(self): super(PythonStdoutEvaluator, self).teardown() # Delete the created file. if self.files: delete_files(self.files)
def teardown(self): # Delete the created file. super(BashCodeEvaluator, self).teardown() os.remove(self.submit_code_path) if self.files: delete_files(self.files)