def get_files(self, date): """ This method returns a list of unprocessed files for the given date. """ # --------- # get the input and output index file paths # --------- curr_input_file = index_file.index_file_path( self.input_index.base_dir, self.input_index.index_name, date) curr_output_file = index_file.index_file_path( self.output_index.base_dir, self.output_index.index_name, date) # --------- # open the input and output index files and compare them # --------- input_set = compare_sets.open_read_file(curr_input_file) output_set = compare_sets.open_read_file(curr_output_file) if (len(output_set) > 0): out_new_filenames = set([]) for name in output_set: new_output_name = self.map_file_name.map_name(name) out_new_filenames.add(new_output_name) # get the set difference between the converted input set and the output set unproc_filenames = compare_sets.set_difference( out_new_filenames, input_set) else: unproc_filenames = input_set if (len(unproc_filenames) == 0): return (unproc_filenames) # --------- # sort the set of unprocessed files # --------- else: unproc_files = compare_sets.sort_set(unproc_filenames) return (unproc_files)
def get_files_poes(self, date): """ This method returns a list of processed files for the given date. """ # --------- # get the input list of files and output index file paths # --------- output_file = index_file.index_file_path(self.output_index.base_dir, self.output_index.index_name, date) # --------- # open the output index file and that set to the input set # --------- unproc_filenames = set([]) output_set = compare_sets.open_read_file(output_file) if (len(output_set) > 0): for name in self.input_set: new_input_filenames = set([]) new_input_name = self.map_file_name.map_name_out(name) new_input_filenames.add(new_input_name) # check to see if this file name is in the output set intersect_file = compare_sets.set_intersection( new_input_filenames, output_set) if len(intersect_file) == 0: unproc_filenames.add(name) else: unproc_filenames = self.input_set if (len(unproc_filenames) == 0): return (unproc_filenames) # --------- # sort the set of unprocessed files # --------- else: unproc_files = compare_sets.sort_set(unproc_filenames) return (unproc_files)
def get_file(ifile, start_time, end_time, pos, num): """ Search for num files given a starting and ending time value. If pos == FIRST, file will be first available between these two values. If pos == LAST, file will be last available between these values. """ file_list = [] date_list = [] if (num <= 0): return file_list, date_list # Starting with start_time, open consecutive index files searching for a file # having a time between start_time and end_time. Stop when a file is found # or the index file corresponding to end_time has been searched unsuccessfully. # Create index_file time list tlist = time_list(start_time, end_time, ifile.get_hours() * 3600) if pos == LAST: tlist.reverse() ct = 0 # Check that the last time in tlist corresponds to a different index file hour than the # previous one. tlist_len = len(tlist) if tlist_len > 0: if ifile.get_date_string(tlist[-1]) == ifile.get_date_string( tlist[-2]): tlist = tlist[:-1] # Check that the second time in tlist corresponds to a different index file hour than # the first one. if tlist_len > 2: if ifile.get_date_string(tlist[0]) == ifile.get_date_string(tlist[1]): tlist = tlist[1:] # print 'procd tlist ', tlist for date in tlist: lines = ifile.readlines(date) if pos == LAST: lines.reverse() #print 'lines: ' #print lines for line in lines: sp_line = string.split(line) tval = int(sp_line[index_file_def.TIME_FIELD]) #print "field :", tval if start_time <= tval and tval <= end_time: #print "acc line: ", line ipath = index_file.index_file_path(ifile.get_base_dir(), ifile.get_index_name(), ifile.get_date_string(date)) file_list.append( "%s/%s/%s" % (ifile.get_base_dir(), ifile.get_date_string(date), sp_line[index_file_def.FILE_FIELD])) date_list.append(sp_line[index_file_def.TIME_FIELD]) ct = ct + 1 if ct == num: return file_list, date_list return file_list, date_list
def get_files(self, date): """ This method returns a list of processed files for the given date. """ # --------- # get the input list of files and output index file paths # --------- # loop over the list of input_index files input_files = [] for ind in xrange(len(self.input_index_list)): input_index = self.input_index_list[ind] input_files.append( index_file.index_file_path(input_index.base_dir, input_index.index_name, date)) curr_output_file = index_file.index_file_path( self.output_index.base_dir, self.output_index.index_name, date) # --------- # open the input list of index files # and get the intersection of all of the input sets # --------- for ind in xrange(len(input_files)): curr_input_file = input_files[ind] curr_set = compare_sets.open_read_file(curr_input_file) if ind == 0: intersection = curr_set else: intersection = compare_sets.set_intersection( intersection, curr_set) input_set = intersection # --------- # open the input and output index files and compare them # --------- output_set = compare_sets.open_read_file(curr_output_file) if (len(output_set) > 0): out_new_filenames = set([]) for name in output_set: new_output_name = self.map_file_name.map_name(name) out_new_filenames.add(new_output_name) # get the set difference between the converted input set and the output set unproc_filenames = compare_sets.set_difference( out_new_filenames, input_set) else: unproc_filenames = input_set if (len(unproc_filenames) == 0): return (unproc_filenames) # --------- # sort the set of unprocessed files # --------- else: unproc_files = compare_sets.sort_set(unproc_filenames) return (unproc_files)
# Execute the command logf.write_time("Executing: %s\n" % command) if not testing: ret = os.system(command) check_os_ret(ret, 1, logf) # Update the output index file output_index = index_file.Index_file(out_dir, sys_path.Index_base, sys_path.Index_period, "w", sys_path.Index_wait) file_isvalid = 0 hour_exists = 0 if (output_index != ""): index_file_path = index_file.index_file_path(out_dir, sys_path.Index_base, curr_date) if os.path.isfile(index_file_path): file_isvalid = 1 index_file_size = os.path.getsize(index_file_path) if (file_isvalid == 1 and index_file_size != 0): for line in fileinput.input(index_file_path): parts = line.split() file_utime = parts[1] file_hour = (int(file_utime)/3600) * 3600 file_hour_str = time.strftime("%H%M", time.gmtime(file_hour)) if(file_hour == curr_hour): logf.write_time("Info: an entry for the current hour: %s already exists, not adding new entry to index file.\n" % file_hour_str) hour_exists = 1