Esempi in Python per RawDataUtilities.date_from_string, esempi in Python per rawdata_emca.utilities.RawDataUtilities.date_from_string

Esempio n. 1

0

Mostra file

File: built_in.py Progetto: jkbowle/RawDataProcessing

    def execute_connection(self):
        """
        Re-read the class description :)
        
        parameters needed:
        file to strip (strip_file): assumption is that it will be found in the working_directory
        date field to use (date_field):  Most common date formats can be used.. recommended is MM/DD/YYYY or 01/01/2015
        strip parameters (strip_criteria):  pass in 2 elements (separated by a comma), Unit, Date Unit (Day, Month, Year)  example strip_criteria = 6,Month 
            (default is 1,Month)
        
        by default this will be from the run date (which defaults to today's date unless set otherwise), but can be from the "last_processed" date by passing in the optional parameter 
            "use_last_processed = true"
        
        another option is to simply pass in the date to use for stripping.. strip_date = 01/01/2015 
        
        Anything occuring before (not equal to) that date will be pulled off of the file
        
        the stripped off records will be saved under the connector name passed in the parameters        
        """
        filename = self.param_dict['strip_file']
        date_field = self.param_dict['date_field']
        units, date_unit = self.param_dict.get('strip_criteria',
                                               '1,Month').split(",")
        use_last = True if self.param_dict.get(
            "use_last_processed", "false").upper == "TRUE" else False
        strip_date = self.entry.last_processed if use_last else RawDataUtilities.date_from_string(
            self.param_dict.get(
                "strip_date",
                RawDataUtilities.string_from_date(self.entry.today_dt)))

        match_date = self.get_match_date(units, date_unit, strip_date)

        file_in = os.path.join(self.entry.working_directory, filename)
        with open(file_in) as csvfile:
            reader = csv.DictReader(csvfile)
            self.setup_csv_temp_writer(self.get_temp_csv_name(),
                                       reader.fieldnames)
            for row in reader:
                compare_date = RawDataUtilities.date_from_string(
                    row[date_field])
                # should subtract the date on the file from match date
                # Example any dates less than 10/1/2014
                # Compare date = 09/1/2014
                # Difference is less than 0
                diff = RawDataUtilities.get_diff(match_date, compare_date)
                if diff < 0:
                    self.write_temp_rec(row)
            self.close_temp_csv()
        return 0

Esempio n. 2

0

Mostra file

File: built_in.py Progetto: jkbowle/RawDataProcessing

    def execute_connection(self):
        """
        Re-read the class description :)
        
        parameters needed:
        file to strip (strip_file): assumption is that it will be found in the working_directory
        date field to use (date_field):  Most common date formats can be used.. recommended is MM/DD/YYYY or 01/01/2015
        strip parameters (strip_criteria):  pass in 2 elements (separated by a comma), Unit, Date Unit (Day, Month, Year)  example strip_criteria = 6,Month 
            (default is 1,Month)
        
        by default this will be from the run date (which defaults to today's date unless set otherwise), but can be from the "last_processed" date by passing in the optional parameter 
            "use_last_processed = true"
        
        another option is to simply pass in the date to use for stripping.. strip_date = 01/01/2015 
        
        Anything occuring before (not equal to) that date will be pulled off of the file
        
        the stripped off records will be saved under the connector name passed in the parameters        
        """
        filename = self.param_dict["strip_file"]
        date_field = self.param_dict["date_field"]
        units, date_unit = self.param_dict.get("strip_criteria", "1,Month").split(",")
        use_last = True if self.param_dict.get("use_last_processed", "false").upper == "TRUE" else False
        strip_date = (
            self.entry.last_processed
            if use_last
            else RawDataUtilities.date_from_string(
                self.param_dict.get("strip_date", RawDataUtilities.string_from_date(self.entry.today_dt))
            )
        )

        match_date = self.get_match_date(units, date_unit, strip_date)

        file_in = os.path.join(self.entry.working_directory, filename)
        with open(file_in) as csvfile:
            reader = csv.DictReader(csvfile)
            self.setup_csv_temp_writer(self.get_temp_csv_name(), reader.fieldnames)
            for row in reader:
                compare_date = RawDataUtilities.date_from_string(row[date_field])
                # should subtract the date on the file from match date
                # Example any dates less than 10/1/2014
                # Compare date = 09/1/2014
                # Difference is less than 0
                diff = RawDataUtilities.get_diff(match_date, compare_date)
                if diff < 0:
                    self.write_temp_rec(row)
            self.close_temp_csv()
        return 0

Esempio n. 3

0

Mostra file

File: custom_processors.py Progetto: jkbowle/RawDataProcessing

    def get_min_max(self,csv_in, date_field):
        max_field = "max_"+date_field
        min_field = "min_"+date_field
        max_d = None
        min_d = None
        header = []
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]
                
                rec_date = RawDataUtilities.date_from_string(rec_date_str)
                
                if not max_d:
                    max_d = rec_date
                
                adelta = max_d - rec_date
                
                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date
                
                if not min_d:
                    min_d = rec_date
                    
                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date
        return max_d, min_d, header

Esempio n. 4

0

Mostra file

File: custom_processors.py Progetto: jkbowle/RawDataProcessing

    def get_min_max(self, csv_in, date_field):
        max_field = "max_" + date_field
        min_field = "min_" + date_field
        max_d = None
        min_d = None
        header = []
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]

                rec_date = RawDataUtilities.date_from_string(rec_date_str)

                if not max_d:
                    max_d = rec_date

                adelta = max_d - rec_date

                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date

                if not min_d:
                    min_d = rec_date

                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date
        return max_d, min_d, header

Esempio n. 5

0

Mostra file

File: custom_processors.py Progetto: jkbowle/RawDataProcessing

    def execute_processor(self):
        filename = self.param_dict['in_file']
        date_field = self.param_dict['date_field']

        max_field = "max_" + date_field
        min_field = "min_" + date_field

        csv_in = os.path.join(self.entry.working_directory, filename)

        max_d, min_d, header = self.get_min_max(csv_in, date_field)

        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]

                rec_date = RawDataUtilities.date_from_string(rec_date_str)

                if not max_d:
                    max_d = rec_date

                adelta = max_d - rec_date

                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date

                if not min_d:
                    min_d = rec_date

                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date

        self.setup_csv_temp_writer(self.get_temp_csv_name(),
                                   header,
                                   write_header=True)

        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                out_dict = {}
                out_dict[max_field] = max_d
                out_dict[min_field] = min_d
                for key in row.keys():
                    out_dict[key] = row[key]
                self.write_temp_rec(out_dict)

        self.close_temp_csv()

        return 0

Esempio n. 6

0

Mostra file

File: custom_processors.py Progetto: jkbowle/RawDataProcessing

    def execute_processor(self):
        filename = self.param_dict['in_file']
        date_field = self.param_dict['date_field']
        
        max_field = "max_"+date_field
        min_field = "min_"+date_field
        
        csv_in = os.path.join(self.entry.working_directory,filename)
        
        max_d, min_d, header = self.get_min_max(csv_in, date_field)
        
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if not header:
                    header = row.keys()
                    header.append(max_field)
                    header.append(min_field)
                rec_date_str = row[date_field]
                
                rec_date = RawDataUtilities.date_from_string(rec_date_str)
                
                if not max_d:
                    max_d = rec_date
                
                adelta = max_d - rec_date
                
                if adelta < datetime.timedelta(minutes=1):
                    max_d = rec_date
                
                if not min_d:
                    min_d = rec_date
                    
                adelta = rec_date - min_d

                if adelta < datetime.timedelta(minutes=1):
                    min_d = rec_date
        
        self.setup_csv_temp_writer(self.get_temp_csv_name(), header, write_header=True)
        
        with open(csv_in) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                out_dict = {}
                out_dict[max_field] = max_d
                out_dict[min_field] = min_d
                for key in row.keys():
                    out_dict[key] = row[key]
                self.write_temp_rec(out_dict)
                
        self.close_temp_csv()
        
        return 0

Esempio n. 7

0

Mostra file

 def __init__(self, params):
     """
     Constructor
     """
     Base_Type.__init__(self)
     if 'run_date' in params:
         self.run_date = RawDataUtilities.date_from_string(params['run_date'])
     
     self.entry_name = params.get('entry_name',None)
     self.total_time = 0.0
     
     self.entries[0] = []
     self.entries[1] = []
     
     #self.set_uuid(params.get('uuid','---none---'))
     self.log_message("Initialization Complete", log_type='main', status='running', step='load configs',name='config_file_reader',log_level=self.log_info())

Esempio n. 8

0

Mostra file

File: custom_processors.py Progetto: jkbowle/RawDataProcessing

    def load_filters(self):
        crit_list = {
            'equal': self._equal,
            'gt': self._gt,
            'lt': self._lt,
            'gte': self._gte,
            'lte': self._lte,
            'in': self._in
        }

        self.crit_dict = {}
        for param in self.param_dict.keys():
            criterion = param[:param.find('_')]
            if criterion in crit_list.keys():
                field = param[len(criterion) + 1:]
                dt = False
                if field.endswith("_dt"):
                    dt = True
                    field = field[:-3]
                value = self.param_dict[param]

                if dt:
                    value = RawDataUtilities.date_from_string(value)
                else:
                    if criterion == 'in':
                        mlist = value.split(",")
                        value = [
                            self.convert(m) if self.convert(m) else m
                            for m in mlist
                        ]
                    else:
                        value = self.convert(value)

                self.crit_dict[param] = {
                    'field': field,
                    'criterion': criterion,
                    'value': value,
                    'function': crit_list[criterion]
                }

Esempio n. 9

0

Mostra file

File: custom_processors.py Progetto: jkbowle/RawDataProcessing

 def load_filters(self):
     crit_list = {'equal':self._equal,'gt':self._gt,'lt':self._lt,'gte':self._gte,'lte':self._lte,'in':self._in}
     
     self.crit_dict = {}
     for param in self.param_dict.keys():
         criterion = param[:param.find('_')]
         if criterion in crit_list.keys():
             field = param[len(criterion)+1:]
             dt = False
             if field.endswith("_dt"):
                 dt = True
                 field = field[:-3]
             value = self.param_dict[param]
             
             if dt:
                 value = RawDataUtilities.date_from_string(value)
             else:
                 if criterion == 'in':
                     mlist = value.split(",")
                     value = [self.convert(m) if self.convert(m) else m for m in mlist]
                 else:
                     value = self.convert(value)
                 
             self.crit_dict[param] = {'field':field,'criterion':criterion,'value':value,'function':crit_list[criterion]}

Esempio n. 10

0

Mostra file

File: config_entry.py Progetto: jkbowle/RawDataProcessing

 def __init__(self, params):
     """
     Constructor
     """
     Base_Type.__init__(self)
     ## class variables
     self.updates={}
     self.return_val = None
     self.description = None
     self.connection_type = None
     self.last_processed = None
     self.num_run = 0
     self.message = None
     self.no_run_reason = 'Schedule Not Ready'
 
     ### valid options... Default is Append
     # Append - Add records to the end
     # Overlay - replace the current with the new
     # New - Add a new file with the run number on the end Most current result has the name with no number
     self.file_write=None
 
     ## Valid Options... Default is Temp CSV file
     # Dataframe
     # ListList
     # DictionaryList
     # TempCSV
     # None
     self.temp_results=None
     self.out_filename = None
     self.src_implementation = None
     self.run_frequency = None
 
     ## location where results are written
     self.working_directory = None
 
     self.options = {}
     self.today_dt = None
     self.source_file = None
     self.instance = None
     self.temp_csv = None
     self.csv = None
     
     ## required fields
     self.today_dt = params['run_date']
     self.name = params['name']
     self.source_file = params['source_file']
     self.description = params['description']
     self.src_implementation = params['src_implementation']
     self.working_directory = params['working_directory']
     ## optional fields with defaults
     self.dependencies = [] if (params.get('dependencies', None)) == None else params['dependencies'].split(",")
     self.dependencies.append('kicker')
     self.successors= [] if (params.get('successors', None)) == None else params['successors'].split(",")
     self.connection_type = params.get('connection_type','none')    
     
     #  if it hasn't run before it will be empty
     self.first_run = False
     if params.get('last_processed',None):
         self.last_processed = params['last_processed']
     else:
         self.last_processed = RawDataUtilities.string_from_date(self.today_dt)
         self.first_run = True
     
     self.last_processed = RawDataUtilities.date_from_string(self.last_processed)
     
     self.num_run = int(params.get('num_run',0))
     self.out_filename = params.get('out_filename',self.name)
     self.run_frequency = params.get('run_frequency','Daily')
     self.temp_results = params.get('temp_results','TempCSV')
     self.file_write = params.get('file_write','Append')
     #self.entry_type = params.get('entry_type', 'Connector')
     self.last_run = params.get('last_run','success')
     self.instantiate_instance = True if params.get("instantiate","true") == "true" else False
     ## parameters to pass down to the entry implementation
     self.options = params.get('non_std',{})
     
     self.updates['last_processed'] = RawDataUtilities.string_from_date(self.today_dt)
     self.updates['num_run'] = str(self.num_run + 1)
     
     self.order = 0
     self.ready = True
     if self.instantiate_instance and self.get_instance():
         self.log_message("Initialization Complete (success): "+self.name, log_type='entry', status='running', step='load configs',name='config_entry',log_level=self.log_info())
     else:
         if self.instantiate_instance:
             self.ready = False
             self.log_message("Initialization Complete (failure): "+self.name, log_type='entry', status='running', step='load configs',name='config_entry',log_level=self.log_info())

Esempio n. 11

0

Mostra file

    def __init__(self, params):
        """
        Constructor
        """
        Base_Type.__init__(self)
        ## class variables
        self.updates = {}
        self.return_val = None
        self.description = None
        self.connection_type = None
        self.last_processed = None
        self.num_run = 0
        self.message = None
        self.no_run_reason = 'Schedule Not Ready'

        ### valid options... Default is Append
        # Append - Add records to the end
        # Overlay - replace the current with the new
        # New - Add a new file with the run number on the end Most current result has the name with no number
        self.file_write = None

        ## Valid Options... Default is Temp CSV file
        # Dataframe
        # ListList
        # DictionaryList
        # TempCSV
        # None
        self.temp_results = None
        self.out_filename = None
        self.src_implementation = None
        self.run_frequency = None

        ## location where results are written
        self.working_directory = None

        self.options = {}
        self.today_dt = None
        self.source_file = None
        self.instance = None
        self.temp_csv = None
        self.csv = None

        ## required fields
        self.today_dt = params['run_date']
        self.name = params['name']
        self.source_file = params['source_file']
        self.description = params['description']
        self.src_implementation = params['src_implementation']
        self.working_directory = params['working_directory']
        ## optional fields with defaults
        self.dependencies = [] if (params.get(
            'dependencies',
            None)) == None else params['dependencies'].split(",")
        self.dependencies.append('kicker')
        self.successors = [] if (params.get(
            'successors', None)) == None else params['successors'].split(",")
        self.connection_type = params.get('connection_type', 'none')

        #  if it hasn't run before it will be empty
        self.first_run = False
        if params.get('last_processed', None):
            self.last_processed = params['last_processed']
        else:
            self.last_processed = RawDataUtilities.string_from_date(
                self.today_dt)
            self.first_run = True

        self.last_processed = RawDataUtilities.date_from_string(
            self.last_processed)

        self.num_run = int(params.get('num_run', 0))
        self.out_filename = params.get('out_filename', self.name)
        self.run_frequency = params.get('run_frequency', 'Daily')
        self.temp_results = params.get('temp_results', 'TempCSV')
        self.file_write = params.get('file_write', 'Append')
        #self.entry_type = params.get('entry_type', 'Connector')
        self.last_run = params.get('last_run', 'success')
        self.instantiate_instance = True if params.get(
            "instantiate", "true") == "true" else False
        ## parameters to pass down to the entry implementation
        self.options = params.get('non_std', {})

        self.updates['last_processed'] = RawDataUtilities.string_from_date(
            self.today_dt)
        self.updates['num_run'] = str(self.num_run + 1)

        self.order = 0
        self.ready = True
        if self.instantiate_instance and self.get_instance():
            self.log_message("Initialization Complete (success): " + self.name,
                             log_type='entry',
                             status='running',
                             step='load configs',
                             name='config_entry',
                             log_level=self.log_info())
        else:
            if self.instantiate_instance:
                self.ready = False
                self.log_message("Initialization Complete (failure): " +
                                 self.name,
                                 log_type='entry',
                                 status='running',
                                 step='load configs',
                                 name='config_entry',
                                 log_level=self.log_info())