def process_files(global_config, attr_definitions, input_dir, recursive=True):
    start_time = datetime.datetime.now()

    # Initialize the database session connection
    db_name = global_config["db_name"] + global_config["this_season"]
    session = DbSession.open_db_session(db_name)

    some_files_processed = False

    # read the ignore file list config each time through the loop. Any files
    # in the ignore list will be skipped
    ignore_filelist = read_ignore_filelist_cfg(input_dir + "IgnoreFiles.txt")

    # The following regular expression will select all files that conform to
    # the file naming format Team*.txt. Build a list of all datafiles that match
    # the naming format within the directory passed in via command line
    # arguments.
    file_regex = re.compile("Team[a-zA-Z0-9_]+.txt")
    files = get_files(global_config, session, db_name, input_dir, file_regex, recursive)

    if len(files) > 0:
        log_msg = "files retrieved, elapsed time - %s" % (str(datetime.datetime.now() - start_time))
        print log_msg
        global_config["logger"].debug("%s - %s" % (process_files.__name__, log_msg))

        global_config["logger"].debug("%s - %d Files to be processed" % (process_files.__name__, len(files)))

    # Process data files
    for data_filename in files:
        # If the file is on the ignore list (quarantined), then skip it
        if data_filename.split("/")[-1] in ignore_filelist:
            global_config["logger"].debug("%s - Ignoring file: %s" % (process_files.__name__, data_filename))
            continue

        # Make sure that the data file has not already been processed. We have seen cases
        # where the data file gets inserted into the list of files to be processed more than
        # once.
        file_processed = isFileProcessed(global_config, session, db_name, data_filename)
        if not file_processed:
            try:
                global_config["logger"].debug("%s - Processing file: %s" % (process_files.__name__, data_filename))
                process_file(global_config, session, attr_definitions, data_filename)
            except Exception, e:
                global_config["logger"].debug(
                    "%s - Error processing file: %s" % (process_files.__name__, data_filename)
                )
                # log the exception but continue processing other files
                log_exception(global_config["logger"], e)

            # add the file to the set of processed files so that we don't process it again. Do it outside the
            # try/except block so that we don't try to process a bogus file over and over again.
            DataModel.addProcessedFile(session, data_filename)
            some_files_processed = True
        else:
            global_config["logger"].debug(
                "%s - Skipping file: %s, already processed" % (process_files.__name__, data_filename)
            )

        # Commit all updates to the database
        session.commit()
def process_files(global_config, attr_definitions, input_dir, recursive=True):
    start_time = datetime.datetime.now()
    
    # Initialize the database session connection
    db_name  = global_config['db_name']
    session  = DbSession.open_db_session(db_name)
 
    some_files_processed = False
    
    # The following regular expression will select all files that conform to 
    # the file naming format Team*.txt. Build a list of all datafiles that match
    # the naming format within the directory passed in via command line 
    # arguments.
    file_regex = re.compile('Team[a-zA-Z0-9_]+.txt')
    files = get_files(global_config, session, db_name, input_dir, file_regex, recursive)
    
    print 'files retrieved, elapsed time - %s' % (str(datetime.datetime.now()-start_time))

    # Process data files
    for data_filename in files:
        try:
            process_file( global_config, session, attr_definitions, data_filename)
        except Exception, e:
            # log the exception but continue processing other files
            log_exception(global_config['logger'], e)

        # add the file to the set of processed files so that we don't process it again. Do it outside the
        # try/except block so that we don't try to process a bogus file over and over again.       
        DataModel.addProcessedFile(session, data_filename)
        some_files_processed = True
        
        # Commit all updates to the database
        session.commit()
def process_files(session, db_name, attr_definitions, input_dir, recursive, test):
    # The following regular expression will select all files that conform to 
    # the file naming format Team*.txt. Build a list of all datafiles that match
    # the naming format within the directory passed in via command line 
    # arguments.
    file_regex = re.compile('Team[a-zA-Z0-9_]+.txt')
    files = get_files(session, db_name, input_dir, file_regex, recursive, test)
    
    # Process data files
    for data_filename in files:
        try:
            process_file( session, attr_definitions, data_filename)
        except Exception, e:
            # log the exception but continue processing other files
            log_exception(e)

        # add the file to the set of processed files so that we don't process it again. Do it outside the
        # try/except block so that we don't try to process a bogus file over and over again.       
        DataModel.addProcessedFile(session, data_filename)
        
        # Commit all updates to the database
        session.commit()