def empty_dir(dir_path): """ :param dir_path: directory absolute path :return: directory absolute path cleaning a directory. removing files and directory """ try: if ResourceManager.is_path_exist(dir_path): for root, dirs, files in walk(dir_path): for f in files: log.info("deleting file %s : " % f) unlink(path.join(root, f)) log.info("%s file deleted : " % f) for d in dirs: log.info("deleting directory : %s: " % d) shutil.rmtree(path.join(root, d)) log.info("%s directory deleted : " % d) log.info("%s directory path returned : " % dir_path) else: msg = "%s directory path is not valid." % dir_path raise Exception(msg) except: trace_err = StackTrace.get_stack_trace() msg = "ResourceManager (empty_dir()) : %s%s" % ("\n", trace_err) log.error(msg) raise Exception(msg)
def clean_product_names(text, product_names): """ :param text: text as str :return: modified text as str This method will replace product names provided in product_name_replace.csv in resource. Ex : xen server => xenserver basically space will be removed from product names. """ try: text = text.strip() if text: for key, val in product_names.iteritems(): if key in text: text = text.replace(key, val) return text else: return " " except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_product_names()) : %s%s" % ( "\n", trace_err) log.error(msg) raise Exception(msg)
def read_product_name_replace_file(csv_file_abs_path): """ :param : reading the product name replace file and make key value pair :return: dictionary. """ try: if ResourceManager.is_path_exist(csv_file_abs_path): result = {} log.info("%s exists. Reading csv file." % csv_file_abs_path) df = read_csv(csv_file_abs_path) for row in df.itertuples(): key = row[1].strip().lower() val = row[2].strip().lower() if key and val: result[key] = val else: raise Exception( "%s file has one column element empty. Please verify the csv file. " "The two column should have some value not empty." % csv_file_abs_path) return result else: raise Exception("%s does not exists in resource directory." % csv_file_abs_path) except: trace_err = StackTrace.get_stack_trace() msg = "ResourceManager (read_product_name_replace_file()) : %s%s" % ( "\n", trace_err) log.error(msg) raise Exception(msg)
def clean_text(text): """ :param text: text as str :return: list of sentences """ try: text = text.strip() if text: final_sentences = [] token_text = tokenize(text) for sentence in token_text: words = sentence.split() cleaned_tokens = [ porter_stemmer.stem(word) for word in words if word not in punctuation ] cleaned_sent = " ".join(cleaned_tokens) cleaned_sent = CleanTextProcessor.clean_not_words( cleaned_sent) cleaned_sentence = cleaned_sent + "." final_sentences.append(cleaned_sentence) return final_sentences else: return [] except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_text()) : %s%s" % ("\n", trace_err) log.error(msg) raise Exception(msg)
def __create_namedtuple(self, name, val): """ :param name: name list :param val: corresponding value list :return: namedtuple """ try: obj = namedtuple('obj', ",".join(name)) return obj._make(val) except: trace_err = StackTrace.get_stack_trace() msg = "Config (__create_namedtuple()) : %s%s" % ("\n", trace_err) raise Exception(msg)
def clean_not_words(text): try: text = text.strip() if text: text = text.replace("n't", "not") return text else: return " " except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_not_words()) : %s%s" % ("\n", trace_err) log.error(msg) raise Exception(msg)
def clean_escape_codes(text): try: text = text.strip() if text: return str(text).replace("\n", " ").replace("\t", " ").replace( "\r", " ").replace("\a", " ").replace("\b", " ").strip() else: return " " except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_escape_codes()) : %s%s" % ( "\n", trace_err) log.error(msg) raise Exception(msg)
def __read_file(self, file_path): """ :param file_path: absolute path of the file. :return: entity name and value as list inside config file. """ try: val = [] name = [] with open(file_path, 'r') as f: for l in f: l = l.strip() if l != "\n" and l: if l.startswith("#"): continue line = l.split("=") left_side_val = line[0].strip().lower() right_side_val = line[1].strip() name.append(left_side_val) if right_side_val.lower() == 'true': val.append(True) elif right_side_val.lower() == 'false': val.append(False) elif right_side_val.lstrip("+-").isdigit(): val.append(int(right_side_val)) elif (len(right_side_val.lower()) == 0) or (right_side_val.lower() == "n/a"): val.append(None) else: if left_side_val.lower() in [ "gcs_credential_directory_path", "local_upload_path", "local_download_path", "local_log_path" ]: ResourceManager.create_dir(right_side_val) val.append(right_side_val) f.close() return name, val except: trace_err = StackTrace.get_stack_trace() msg = "Config (__read_file()) : %s%s" % ("\n", trace_err) raise Exception(msg)
def create_dir(dir_path): """ :param: relative path of directory :return: None #if directory does not exists , create directory """ try: if not ResourceManager.is_path_exist(dir_path): log.info("creating %s directory" % dir_path) makedirs(dir_path) log.info("%s : directory created" % dir_path) return dir_path except: trace_err = StackTrace.get_stack_trace() msg = "ResourceManager (create_dir()) : %s%s" % ("\n", trace_err) log.error(msg) raise Exception(msg)
def clean_stop_words(text): try: text = text.strip() if text: tokens = [ word for word in text.split() if word not in stop_words ] return " ".join(tokens) else: return " " except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_stop_words()) : %s%s" % ( "\n", trace_err) log.error(msg) raise Exception(msg)
def clean_codecs(text): try: text = text.strip() if text: if not isinstance(text, (int, long)): text = filter(lambda x: x in string.printable, text) return text else: return " " except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_codecs()) : %s%s" % ("\n", trace_err) log.error(msg) raise Exception(msg)
def clean_dictionary_words(text): try: result = [] text = text.strip() if text: for t in text.split(): t = t.strip() if not dic_en.check(t): result.append(t) return " ".join(result) else: return " " except: trace_err = StackTrace.get_stack_trace() msg = "CleanTextProcessor (clean_dictionary_words()) : %s%s" % ( "\n", trace_err) log.error(msg) raise Exception(msg)
def __setup(self, project_env): """ :param project_env: environment type (prod, test, local) :return: None #read the config from respective folder. """ try: config_file_path = sep.join([ self.base_dir, "configuration", "%s%s" % (project_env, "_setup"), "config" ]) if ResourceManager.is_path_exist(config_file_path): name, val = self.__read_file(config_file_path) return self.__create_namedtuple(name, val) else: msg = project_env + " : config file path is not a valid path." raise Exception(msg) except: trace_err = StackTrace.get_stack_trace() msg = "Config (__setup()) : %s%s" % ("\n", trace_err) raise Exception(msg)
def read_file(file_abs_path): """ :param : relative path of file :return: unique lines as set. """ try: if ResourceManager.is_path_exist(file_abs_path): log.info("%s exists. Reading file." % file_abs_path) names = set() with open(file_abs_path, "r") as f: lines = f.read().split("\n") for l in lines: l = l.strip() if l: names.add(l.lower()) return names else: raise Exception("%s does not exists" % file_abs_path) except: trace_err = StackTrace.get_stack_trace() msg = "ResourceManager (read_file()) : %s%s" % ("\n", trace_err) log.error(msg) raise Exception(msg)