def get_files(self, date):
        """
        This method returns a list of unprocessed
        files for the given date.
        """

        # ---------
        # get the input and output index file paths
        # ---------
        curr_input_file = index_file.index_file_path(
            self.input_index.base_dir, self.input_index.index_name, date)
        curr_output_file = index_file.index_file_path(
            self.output_index.base_dir, self.output_index.index_name, date)

        # ---------
        # open the input and output index files and compare them
        # ---------
        input_set = compare_sets.open_read_file(curr_input_file)
        output_set = compare_sets.open_read_file(curr_output_file)

        if (len(output_set) > 0):
            out_new_filenames = set([])
            for name in output_set:
                new_output_name = self.map_file_name.map_name(name)
                out_new_filenames.add(new_output_name)

            # get the set difference between the converted input set and the output set
            unproc_filenames = compare_sets.set_difference(
                out_new_filenames, input_set)

        else:
            unproc_filenames = input_set

        if (len(unproc_filenames) == 0):
            return (unproc_filenames)

        # ---------
        # sort the set of unprocessed files
        # ---------
        else:
            unproc_files = compare_sets.sort_set(unproc_filenames)
            return (unproc_files)
    def get_files_poes(self, date):
        """
        This method returns a list of processed
        files for the given date.
        """

        # ---------
        # get the input list of files and output index file paths
        # ---------
        output_file = index_file.index_file_path(self.output_index.base_dir,
                                                 self.output_index.index_name,
                                                 date)

        # ---------
        # open the output index file and that set to the input set
        # ---------
        unproc_filenames = set([])
        output_set = compare_sets.open_read_file(output_file)
        if (len(output_set) > 0):
            for name in self.input_set:
                new_input_filenames = set([])
                new_input_name = self.map_file_name.map_name_out(name)
                new_input_filenames.add(new_input_name)

                # check to see if this file name is in the output set
                intersect_file = compare_sets.set_intersection(
                    new_input_filenames, output_set)
                if len(intersect_file) == 0:
                    unproc_filenames.add(name)

        else:
            unproc_filenames = self.input_set

        if (len(unproc_filenames) == 0):
            return (unproc_filenames)

        # ---------
        # sort the set of unprocessed files
        # ---------
        else:
            unproc_files = compare_sets.sort_set(unproc_filenames)
            return (unproc_files)
def get_file(ifile, start_time, end_time, pos, num):
    """
    Search for num files given a starting and ending time value. If pos == FIRST, file will be first
    available between these two values. If pos == LAST, file will be last available between these
    values.
    """

    file_list = []
    date_list = []

    if (num <= 0):
        return file_list, date_list

    # Starting with start_time, open consecutive index files searching for a file
    # having a time between start_time and end_time. Stop when a file is found
    # or the index file corresponding to end_time has been searched unsuccessfully.

    # Create index_file time list
    tlist = time_list(start_time, end_time, ifile.get_hours() * 3600)
    if pos == LAST:
        tlist.reverse()

    ct = 0

    # Check that the last time in tlist corresponds to a different index file hour than the
    # previous one.
    tlist_len = len(tlist)
    if tlist_len > 0:
        if ifile.get_date_string(tlist[-1]) == ifile.get_date_string(
                tlist[-2]):
            tlist = tlist[:-1]

    # Check that the second time in tlist corresponds to a different index file hour than
    # the first one.
    if tlist_len > 2:
        if ifile.get_date_string(tlist[0]) == ifile.get_date_string(tlist[1]):
            tlist = tlist[1:]

    # print 'procd tlist ', tlist
    for date in tlist:
        lines = ifile.readlines(date)
        if pos == LAST:
            lines.reverse()
        #print 'lines: '
        #print lines
        for line in lines:
            sp_line = string.split(line)
            tval = int(sp_line[index_file_def.TIME_FIELD])
            #print "field :", tval
            if start_time <= tval and tval <= end_time:
                #print "acc line: ", line
                ipath = index_file.index_file_path(ifile.get_base_dir(),
                                                   ifile.get_index_name(),
                                                   ifile.get_date_string(date))
                file_list.append(
                    "%s/%s/%s" %
                    (ifile.get_base_dir(), ifile.get_date_string(date),
                     sp_line[index_file_def.FILE_FIELD]))
                date_list.append(sp_line[index_file_def.TIME_FIELD])
                ct = ct + 1
                if ct == num:
                    return file_list, date_list

    return file_list, date_list
    def get_files(self, date):
        """
        This method returns a list of processed
        files for the given date.
        """

        # ---------
        # get the input list of files and output index file paths
        # ---------
        # loop over the list of input_index files
        input_files = []
        for ind in xrange(len(self.input_index_list)):
            input_index = self.input_index_list[ind]
            input_files.append(
                index_file.index_file_path(input_index.base_dir,
                                           input_index.index_name, date))

        curr_output_file = index_file.index_file_path(
            self.output_index.base_dir, self.output_index.index_name, date)

        # ---------
        # open the input list of index files
        # and get the intersection of all of the input sets
        # ---------
        for ind in xrange(len(input_files)):
            curr_input_file = input_files[ind]
            curr_set = compare_sets.open_read_file(curr_input_file)
            if ind == 0:
                intersection = curr_set
            else:
                intersection = compare_sets.set_intersection(
                    intersection, curr_set)

        input_set = intersection

        # ---------
        # open the input and output index files and compare them
        # ---------
        output_set = compare_sets.open_read_file(curr_output_file)

        if (len(output_set) > 0):
            out_new_filenames = set([])
            for name in output_set:
                new_output_name = self.map_file_name.map_name(name)
                out_new_filenames.add(new_output_name)

            # get the set difference between the converted input set and the output set
            unproc_filenames = compare_sets.set_difference(
                out_new_filenames, input_set)

        else:
            unproc_filenames = input_set

        if (len(unproc_filenames) == 0):
            return (unproc_filenames)

        # ---------
        # sort the set of unprocessed files
        # ---------
        else:
            unproc_files = compare_sets.sort_set(unproc_filenames)
            return (unproc_files)
Ejemplo n.º 5
0
    
    # Execute the command
    logf.write_time("Executing: %s\n" % command)
    if not testing:
        ret = os.system(command)
        check_os_ret(ret, 1, logf)


    # Update the output index file
    output_index = index_file.Index_file(out_dir, sys_path.Index_base, sys_path.Index_period, "w", sys_path.Index_wait)
    
    file_isvalid = 0
    hour_exists = 0
    
    if (output_index != ""):
        index_file_path = index_file.index_file_path(out_dir, sys_path.Index_base, curr_date)

        if os.path.isfile(index_file_path):
            file_isvalid = 1
            index_file_size = os.path.getsize(index_file_path)
                
        if (file_isvalid == 1 and index_file_size != 0):

            for line in fileinput.input(index_file_path):
                parts = line.split()
                file_utime = parts[1]
                file_hour = (int(file_utime)/3600) * 3600
                file_hour_str = time.strftime("%H%M", time.gmtime(file_hour))
                if(file_hour == curr_hour):
                    logf.write_time("Info: an entry for the current hour: %s already exists, not adding new entry to index file.\n" % file_hour_str)
                    hour_exists = 1