def __init__(self): """ Initalizes DataProcessing class with utilities and parallel processing """ self.res = Res() self.pool = Pool()
def __init__(self): """ Initalize resources, preprcess and feature engineering classes """ self.res = Res() self.preprocess = Pipe() self.feature = Vec() self.data = Data()
class Pipe(): def __init__(self): """ Initalizes DataProcessing class with utilities and parallel processing Paras: None Returns: None """ self.res = Res() self.pool = Pool() def getDescription(self, description): """ Retuns list of cleaned description from dataframe list column Paras: summaries: list of work order desciptions Returns: summaries: list of cleaned descriptions """ description = [ re.sub(r"[^a-zA-Z]", " ", description[i].lower()) for i in range(len(description)) ] #print("Desc: ", description) #return list(self.pool.map(self.res.clean_text, description)) return list(self.pool.map(self.res.clean_text, description)) def createDataframe(self, description): """ Creates dataframe class of cleaned descriptions. Paras: reviews: cleaned concated reviews Returns: ratings: ratings of the reviews """ return pd.DataFrame({"descriptions": description}) def ProcessData(self, column_names=["Description_Document"]): """ Runs DataProcessing class Paras: None Returns:_ None """ dataframe = self.res.loadData(r"./Data", column_names) #print("D: ", dataframe["Description_Document"]) description = self.getDescription( list(dataframe["Description_Document"])) #Sprint("des: ", description) # review = self.getReview(list(dataframe["reviewText"])) # reviews = self.utls.concate_columns(summaries, review) # rating = list(dataframe["overall"]) return self.createDataframe(description)
class Pipe(): def __init__(self): """ Initalizes DataProcessing class with utilities and parallel processing """ self.res = Res() self.pool = Pool() def getDescription(self, description): """ Retuns list of cleaned description from dataframe list column Paras: summaries: list of work order desciptions Returns: summaries: list of cleaned descriptions """ description = [ re.sub(r"[^a-zA-Z]", " ", description[i].lower()) for i in range(len(description)) ] return list(self.pool.map(self.res.clean_text, description)) def createDataframe(self, description): """ Creates dataframe class of cleaned descriptions. """ return pd.DataFrame({"descriptions": description}) def ProcessData(self, column_names=["Description_Document"]): """ Runs DataProcessing class """ # Look in directory for dataset dataframe = self.res.loadData(r'./Final/Data', column_names) description = self.getDescription( list(dataframe["Description_Document"])) return self.createDataframe(description)
def __init__(self): self.res = Res() self.vector = Vec() self.pipe = Pipe()
abspath = cfg.settings['ABS_PATH'] tag_input_file = cfg.settings['input_file'] resources_output_file = cfg.settings['output_file1'] tags_output_file = cfg.settings['output_file2'] fieldnames_tag_input = cfg.settings['input_file_fields'] fieldnames_resource_output = cfg.settings['resource_output_fields'] fieldnames_tag_output = cfg.settings['tag_output_fields'] if (args.service == 'ec2'): # get session conn = Connection(region_name, profile_name, "session") # get service service = conn.get_aws_service('ec2') # get resource access res = Res(service) # if the script runs without any argument, it means get all the instances with notags if (args.tag == None and args.value == None and args.file == None): res_list = res.get_ec2_resources_on_taginfo(taginfo="notag") print(res_list) # if the script runs with only tag key and no value if (args.tag and args.value == None): #print("inside args tag",args.tag) res_list = res.get_ec2_resources_on_taginfo(tagonly=args.tag) print(res_list) # if the script runs with both tag key and value if (args.tag and args.value): #print("inside args tag value",args.tag,args.value) res_list = res.get_ec2_resources_on_taginfo(key=args.tag,