def execute_connection(self): """ Re-read the class description :) parameters needed: file to strip (strip_file): assumption is that it will be found in the working_directory date field to use (date_field): Most common date formats can be used.. recommended is MM/DD/YYYY or 01/01/2015 strip parameters (strip_criteria): pass in 2 elements (separated by a comma), Unit, Date Unit (Day, Month, Year) example strip_criteria = 6,Month (default is 1,Month) by default this will be from the run date (which defaults to today's date unless set otherwise), but can be from the "last_processed" date by passing in the optional parameter "use_last_processed = true" another option is to simply pass in the date to use for stripping.. strip_date = 01/01/2015 Anything occuring before (not equal to) that date will be pulled off of the file the stripped off records will be saved under the connector name passed in the parameters """ filename = self.param_dict['strip_file'] date_field = self.param_dict['date_field'] units, date_unit = self.param_dict.get('strip_criteria', '1,Month').split(",") use_last = True if self.param_dict.get( "use_last_processed", "false").upper == "TRUE" else False strip_date = self.entry.last_processed if use_last else RawDataUtilities.date_from_string( self.param_dict.get( "strip_date", RawDataUtilities.string_from_date(self.entry.today_dt))) match_date = self.get_match_date(units, date_unit, strip_date) file_in = os.path.join(self.entry.working_directory, filename) with open(file_in) as csvfile: reader = csv.DictReader(csvfile) self.setup_csv_temp_writer(self.get_temp_csv_name(), reader.fieldnames) for row in reader: compare_date = RawDataUtilities.date_from_string( row[date_field]) # should subtract the date on the file from match date # Example any dates less than 10/1/2014 # Compare date = 09/1/2014 # Difference is less than 0 diff = RawDataUtilities.get_diff(match_date, compare_date) if diff < 0: self.write_temp_rec(row) self.close_temp_csv() return 0
def execute_connection(self): """ Re-read the class description :) parameters needed: file to strip (strip_file): assumption is that it will be found in the working_directory date field to use (date_field): Most common date formats can be used.. recommended is MM/DD/YYYY or 01/01/2015 strip parameters (strip_criteria): pass in 2 elements (separated by a comma), Unit, Date Unit (Day, Month, Year) example strip_criteria = 6,Month (default is 1,Month) by default this will be from the run date (which defaults to today's date unless set otherwise), but can be from the "last_processed" date by passing in the optional parameter "use_last_processed = true" another option is to simply pass in the date to use for stripping.. strip_date = 01/01/2015 Anything occuring before (not equal to) that date will be pulled off of the file the stripped off records will be saved under the connector name passed in the parameters """ filename = self.param_dict["strip_file"] date_field = self.param_dict["date_field"] units, date_unit = self.param_dict.get("strip_criteria", "1,Month").split(",") use_last = True if self.param_dict.get("use_last_processed", "false").upper == "TRUE" else False strip_date = ( self.entry.last_processed if use_last else RawDataUtilities.date_from_string( self.param_dict.get("strip_date", RawDataUtilities.string_from_date(self.entry.today_dt)) ) ) match_date = self.get_match_date(units, date_unit, strip_date) file_in = os.path.join(self.entry.working_directory, filename) with open(file_in) as csvfile: reader = csv.DictReader(csvfile) self.setup_csv_temp_writer(self.get_temp_csv_name(), reader.fieldnames) for row in reader: compare_date = RawDataUtilities.date_from_string(row[date_field]) # should subtract the date on the file from match date # Example any dates less than 10/1/2014 # Compare date = 09/1/2014 # Difference is less than 0 diff = RawDataUtilities.get_diff(match_date, compare_date) if diff < 0: self.write_temp_rec(row) self.close_temp_csv() return 0
def get_min_max(self,csv_in, date_field): max_field = "max_"+date_field min_field = "min_"+date_field max_d = None min_d = None header = [] with open(csv_in) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if not header: header = row.keys() header.append(max_field) header.append(min_field) rec_date_str = row[date_field] rec_date = RawDataUtilities.date_from_string(rec_date_str) if not max_d: max_d = rec_date adelta = max_d - rec_date if adelta < datetime.timedelta(minutes=1): max_d = rec_date if not min_d: min_d = rec_date adelta = rec_date - min_d if adelta < datetime.timedelta(minutes=1): min_d = rec_date return max_d, min_d, header
def get_min_max(self, csv_in, date_field): max_field = "max_" + date_field min_field = "min_" + date_field max_d = None min_d = None header = [] with open(csv_in) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if not header: header = row.keys() header.append(max_field) header.append(min_field) rec_date_str = row[date_field] rec_date = RawDataUtilities.date_from_string(rec_date_str) if not max_d: max_d = rec_date adelta = max_d - rec_date if adelta < datetime.timedelta(minutes=1): max_d = rec_date if not min_d: min_d = rec_date adelta = rec_date - min_d if adelta < datetime.timedelta(minutes=1): min_d = rec_date return max_d, min_d, header
def execute_processor(self): filename = self.param_dict['in_file'] date_field = self.param_dict['date_field'] max_field = "max_" + date_field min_field = "min_" + date_field csv_in = os.path.join(self.entry.working_directory, filename) max_d, min_d, header = self.get_min_max(csv_in, date_field) with open(csv_in) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if not header: header = row.keys() header.append(max_field) header.append(min_field) rec_date_str = row[date_field] rec_date = RawDataUtilities.date_from_string(rec_date_str) if not max_d: max_d = rec_date adelta = max_d - rec_date if adelta < datetime.timedelta(minutes=1): max_d = rec_date if not min_d: min_d = rec_date adelta = rec_date - min_d if adelta < datetime.timedelta(minutes=1): min_d = rec_date self.setup_csv_temp_writer(self.get_temp_csv_name(), header, write_header=True) with open(csv_in) as csvfile: reader = csv.DictReader(csvfile) for row in reader: out_dict = {} out_dict[max_field] = max_d out_dict[min_field] = min_d for key in row.keys(): out_dict[key] = row[key] self.write_temp_rec(out_dict) self.close_temp_csv() return 0
def execute_processor(self): filename = self.param_dict['in_file'] date_field = self.param_dict['date_field'] max_field = "max_"+date_field min_field = "min_"+date_field csv_in = os.path.join(self.entry.working_directory,filename) max_d, min_d, header = self.get_min_max(csv_in, date_field) with open(csv_in) as csvfile: reader = csv.DictReader(csvfile) for row in reader: if not header: header = row.keys() header.append(max_field) header.append(min_field) rec_date_str = row[date_field] rec_date = RawDataUtilities.date_from_string(rec_date_str) if not max_d: max_d = rec_date adelta = max_d - rec_date if adelta < datetime.timedelta(minutes=1): max_d = rec_date if not min_d: min_d = rec_date adelta = rec_date - min_d if adelta < datetime.timedelta(minutes=1): min_d = rec_date self.setup_csv_temp_writer(self.get_temp_csv_name(), header, write_header=True) with open(csv_in) as csvfile: reader = csv.DictReader(csvfile) for row in reader: out_dict = {} out_dict[max_field] = max_d out_dict[min_field] = min_d for key in row.keys(): out_dict[key] = row[key] self.write_temp_rec(out_dict) self.close_temp_csv() return 0
def __init__(self, params): """ Constructor """ Base_Type.__init__(self) if 'run_date' in params: self.run_date = RawDataUtilities.date_from_string(params['run_date']) self.entry_name = params.get('entry_name',None) self.total_time = 0.0 self.entries[0] = [] self.entries[1] = [] #self.set_uuid(params.get('uuid','---none---')) self.log_message("Initialization Complete", log_type='main', status='running', step='load configs',name='config_file_reader',log_level=self.log_info())
def load_filters(self): crit_list = { 'equal': self._equal, 'gt': self._gt, 'lt': self._lt, 'gte': self._gte, 'lte': self._lte, 'in': self._in } self.crit_dict = {} for param in self.param_dict.keys(): criterion = param[:param.find('_')] if criterion in crit_list.keys(): field = param[len(criterion) + 1:] dt = False if field.endswith("_dt"): dt = True field = field[:-3] value = self.param_dict[param] if dt: value = RawDataUtilities.date_from_string(value) else: if criterion == 'in': mlist = value.split(",") value = [ self.convert(m) if self.convert(m) else m for m in mlist ] else: value = self.convert(value) self.crit_dict[param] = { 'field': field, 'criterion': criterion, 'value': value, 'function': crit_list[criterion] }
def load_filters(self): crit_list = {'equal':self._equal,'gt':self._gt,'lt':self._lt,'gte':self._gte,'lte':self._lte,'in':self._in} self.crit_dict = {} for param in self.param_dict.keys(): criterion = param[:param.find('_')] if criterion in crit_list.keys(): field = param[len(criterion)+1:] dt = False if field.endswith("_dt"): dt = True field = field[:-3] value = self.param_dict[param] if dt: value = RawDataUtilities.date_from_string(value) else: if criterion == 'in': mlist = value.split(",") value = [self.convert(m) if self.convert(m) else m for m in mlist] else: value = self.convert(value) self.crit_dict[param] = {'field':field,'criterion':criterion,'value':value,'function':crit_list[criterion]}
def __init__(self, params): """ Constructor """ Base_Type.__init__(self) ## class variables self.updates={} self.return_val = None self.description = None self.connection_type = None self.last_processed = None self.num_run = 0 self.message = None self.no_run_reason = 'Schedule Not Ready' ### valid options... Default is Append # Append - Add records to the end # Overlay - replace the current with the new # New - Add a new file with the run number on the end Most current result has the name with no number self.file_write=None ## Valid Options... Default is Temp CSV file # Dataframe # ListList # DictionaryList # TempCSV # None self.temp_results=None self.out_filename = None self.src_implementation = None self.run_frequency = None ## location where results are written self.working_directory = None self.options = {} self.today_dt = None self.source_file = None self.instance = None self.temp_csv = None self.csv = None ## required fields self.today_dt = params['run_date'] self.name = params['name'] self.source_file = params['source_file'] self.description = params['description'] self.src_implementation = params['src_implementation'] self.working_directory = params['working_directory'] ## optional fields with defaults self.dependencies = [] if (params.get('dependencies', None)) == None else params['dependencies'].split(",") self.dependencies.append('kicker') self.successors= [] if (params.get('successors', None)) == None else params['successors'].split(",") self.connection_type = params.get('connection_type','none') # if it hasn't run before it will be empty self.first_run = False if params.get('last_processed',None): self.last_processed = params['last_processed'] else: self.last_processed = RawDataUtilities.string_from_date(self.today_dt) self.first_run = True self.last_processed = RawDataUtilities.date_from_string(self.last_processed) self.num_run = int(params.get('num_run',0)) self.out_filename = params.get('out_filename',self.name) self.run_frequency = params.get('run_frequency','Daily') self.temp_results = params.get('temp_results','TempCSV') self.file_write = params.get('file_write','Append') #self.entry_type = params.get('entry_type', 'Connector') self.last_run = params.get('last_run','success') self.instantiate_instance = True if params.get("instantiate","true") == "true" else False ## parameters to pass down to the entry implementation self.options = params.get('non_std',{}) self.updates['last_processed'] = RawDataUtilities.string_from_date(self.today_dt) self.updates['num_run'] = str(self.num_run + 1) self.order = 0 self.ready = True if self.instantiate_instance and self.get_instance(): self.log_message("Initialization Complete (success): "+self.name, log_type='entry', status='running', step='load configs',name='config_entry',log_level=self.log_info()) else: if self.instantiate_instance: self.ready = False self.log_message("Initialization Complete (failure): "+self.name, log_type='entry', status='running', step='load configs',name='config_entry',log_level=self.log_info())
def __init__(self, params): """ Constructor """ Base_Type.__init__(self) ## class variables self.updates = {} self.return_val = None self.description = None self.connection_type = None self.last_processed = None self.num_run = 0 self.message = None self.no_run_reason = 'Schedule Not Ready' ### valid options... Default is Append # Append - Add records to the end # Overlay - replace the current with the new # New - Add a new file with the run number on the end Most current result has the name with no number self.file_write = None ## Valid Options... Default is Temp CSV file # Dataframe # ListList # DictionaryList # TempCSV # None self.temp_results = None self.out_filename = None self.src_implementation = None self.run_frequency = None ## location where results are written self.working_directory = None self.options = {} self.today_dt = None self.source_file = None self.instance = None self.temp_csv = None self.csv = None ## required fields self.today_dt = params['run_date'] self.name = params['name'] self.source_file = params['source_file'] self.description = params['description'] self.src_implementation = params['src_implementation'] self.working_directory = params['working_directory'] ## optional fields with defaults self.dependencies = [] if (params.get( 'dependencies', None)) == None else params['dependencies'].split(",") self.dependencies.append('kicker') self.successors = [] if (params.get( 'successors', None)) == None else params['successors'].split(",") self.connection_type = params.get('connection_type', 'none') # if it hasn't run before it will be empty self.first_run = False if params.get('last_processed', None): self.last_processed = params['last_processed'] else: self.last_processed = RawDataUtilities.string_from_date( self.today_dt) self.first_run = True self.last_processed = RawDataUtilities.date_from_string( self.last_processed) self.num_run = int(params.get('num_run', 0)) self.out_filename = params.get('out_filename', self.name) self.run_frequency = params.get('run_frequency', 'Daily') self.temp_results = params.get('temp_results', 'TempCSV') self.file_write = params.get('file_write', 'Append') #self.entry_type = params.get('entry_type', 'Connector') self.last_run = params.get('last_run', 'success') self.instantiate_instance = True if params.get( "instantiate", "true") == "true" else False ## parameters to pass down to the entry implementation self.options = params.get('non_std', {}) self.updates['last_processed'] = RawDataUtilities.string_from_date( self.today_dt) self.updates['num_run'] = str(self.num_run + 1) self.order = 0 self.ready = True if self.instantiate_instance and self.get_instance(): self.log_message("Initialization Complete (success): " + self.name, log_type='entry', status='running', step='load configs', name='config_entry', log_level=self.log_info()) else: if self.instantiate_instance: self.ready = False self.log_message("Initialization Complete (failure): " + self.name, log_type='entry', status='running', step='load configs', name='config_entry', log_level=self.log_info())