Esempio n. 1
0
 def __init__(self):
     """
         Initalizes DataProcessing class with utilities and parallel processing
         
     """
     self.res = Res()
     self.pool = Pool()
Esempio n. 2
0
 def __init__(self):
 
     """
     Initalize resources, preprcess and feature engineering classes
     """
     self.res = Res()
     self.preprocess = Pipe()
     self.feature = Vec()
     self.data = Data()
Esempio n. 3
0
class Pipe():
    def __init__(self):
        """
            Initalizes DataProcessing class with utilities and parallel processing
            Paras:
                None
            Returns:
                None
        """
        self.res = Res()
        self.pool = Pool()

    def getDescription(self, description):
        """
            Retuns list of cleaned description from dataframe list column
            Paras:
                summaries: list of work order desciptions
            Returns:
                summaries: list of cleaned descriptions
        """
        description = [
            re.sub(r"[^a-zA-Z]", " ", description[i].lower())
            for i in range(len(description))
        ]
        #print("Desc: ", description)
        #return list(self.pool.map(self.res.clean_text, description))
        return list(self.pool.map(self.res.clean_text, description))

    def createDataframe(self, description):
        """
            Creates dataframe class of cleaned descriptions.
            Paras:
                reviews: cleaned concated reviews
            Returns:
                ratings: ratings of the reviews
        """
        return pd.DataFrame({"descriptions": description})

    def ProcessData(self, column_names=["Description_Document"]):
        """
            Runs DataProcessing class
            Paras:
                None
            Returns:_
                None
        """

        dataframe = self.res.loadData(r"./Data", column_names)
        #print("D: ", dataframe["Description_Document"])
        description = self.getDescription(
            list(dataframe["Description_Document"]))
        #Sprint("des: ", description)
        #        review = self.getReview(list(dataframe["reviewText"]))
        #        reviews = self.utls.concate_columns(summaries, review)
        #        rating = list(dataframe["overall"])
        return self.createDataframe(description)
Esempio n. 4
0
class Pipe():
    def __init__(self):
        """
            Initalizes DataProcessing class with utilities and parallel processing
            
        """
        self.res = Res()
        self.pool = Pool()

    def getDescription(self, description):
        """
            Retuns list of cleaned description from dataframe list column
            Paras:
                summaries: list of work order desciptions
            Returns:
                summaries: list of cleaned descriptions
        """
        description = [
            re.sub(r"[^a-zA-Z]", " ", description[i].lower())
            for i in range(len(description))
        ]
        return list(self.pool.map(self.res.clean_text, description))

    def createDataframe(self, description):
        """
            Creates dataframe class of cleaned descriptions.
            
        """
        return pd.DataFrame({"descriptions": description})

    def ProcessData(self, column_names=["Description_Document"]):
        """
            Runs DataProcessing class
            
        """
        # Look in directory for dataset
        dataframe = self.res.loadData(r'./Final/Data', column_names)
        description = self.getDescription(
            list(dataframe["Description_Document"]))
        return self.createDataframe(description)
Esempio n. 5
0
    def __init__(self):

        self.res = Res()
        self.vector = Vec()
        self.pipe = Pipe()
Esempio n. 6
0
        abspath = cfg.settings['ABS_PATH']
        tag_input_file = cfg.settings['input_file']
        resources_output_file = cfg.settings['output_file1']
        tags_output_file = cfg.settings['output_file2']
        fieldnames_tag_input = cfg.settings['input_file_fields']
        fieldnames_resource_output = cfg.settings['resource_output_fields']
        fieldnames_tag_output = cfg.settings['tag_output_fields']

        if (args.service == 'ec2'):
            # get session
            conn = Connection(region_name, profile_name, "session")
            # get service
            service = conn.get_aws_service('ec2')

            # get resource access
            res = Res(service)

            # if the script runs without any argument, it means get all the instances with notags

            if (args.tag == None and args.value == None and args.file == None):
                res_list = res.get_ec2_resources_on_taginfo(taginfo="notag")
                print(res_list)
        # if the script runs with only tag key and no value
            if (args.tag and args.value == None):
                #print("inside args tag",args.tag)
                res_list = res.get_ec2_resources_on_taginfo(tagonly=args.tag)
                print(res_list)
        # if the script runs with both tag key and  value
            if (args.tag and args.value):
                #print("inside args tag value",args.tag,args.value)
                res_list = res.get_ec2_resources_on_taginfo(key=args.tag,