def __init__(self, path, file_name): conf = config.Config() data_path = conf.get_config('system', 'data_path') data_path = approot.get_root() + data_path # 获取文本 xml_file = xml.dom.minidom.parse(data_path + path + file_name) self.xml_doc = xml_file.documentElement
def load_data(): c = config.Config() train_data = [] diretory = c.data_dir + c.external_data + 'spam_detection/' data_files = [diretory + 'Youtube01-Psy.csv',diretory + 'Youtube02-KatyPerry.csv',diretory + 'Youtube03-LMFAO.csv',diretory + 'Youtube04-Eminem.csv',diretory + 'Youtube05-Shakira.csv'] for file in data_files: data = pd.read_csv(file) train_data.append(data) train_data.append(pd.read_csv(diretory + 'SMSSpamCollection.csv', engine = 'python')) train_data = pd.concat(train_data) return train_data
def __init__(self): # 初始化配置类 conf = config.Config() constants._init() now_time = datetime.datetime.now() # 设置当次测试日志输出的文件夹与文件 log_path = conf.get_config('system', 'log_path') log_folder = log_path + now_time.strftime('%Y-%m-%d') log_file = now_time.strftime('%H%M%S') constants.set_value('log_folder', log_folder) constants.set_value('log_file', log_file) # 设置当次测试截图输出的文件夹 screenshot_path = conf.get_config('system', 'screenshot_path') screenshot_folder = screenshot_path + now_time.strftime( '%Y-%m-%d_%H%M%S') constants.set_value('screenshot_folder', screenshot_folder) # 设置当次测试excel报告输出的文件 excel_report_path = conf.get_config('system', 'excel_report_path') excel_report_folder = excel_report_path + now_time.strftime('%Y-%m-%d') excel_report_file = now_time.strftime('%H%M%S') constants.set_value('excel_report_folder', excel_report_folder) constants.set_value('excel_report_file', excel_report_file) # html文件存放路径 html_report_path = conf.get_config('system', 'html_report_path') constants.set_value('html_report_path', html_report_path) excel_report_folder = approot.get_root() + excel_report_folder # 创建导出excel报告的文件夹 if not os.path.exists(excel_report_folder): os.makedirs(excel_report_folder) # 设置驱动 driver_class = driver.Driver() my_driver = driver_class.get_driver( conf.get_config('driver', 'default')) constants.set_value('my_driver', my_driver)
except Exception: return 'no_language_detected' def remove_ponctuation(text): if text is not None and isinstance(text, str): return text.translate(str.maketrans('', '', string.punctuation)).lower() else: None if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) c = config.Config() data = pd.read_csv(c.data_dir + c.raw_data + c.comments_file, engine='python') # Drop duplicates data.drop_duplicates(subset='cid', inplace=True, keep='last') # Description cleanning data['text'] = data['text'].apply(remove_ponctuation) logging.info('Punctuation removed!') # description classifier data['text_level'] = data['text'].apply(length_description).apply( description_classifier)