parser.add_argument('--inclusionColumnHeader', '-k', help="Specify the column header of column containing file ID") parser.add_argument('--cutoffDate', '-c', help="Specify a date of extracted data limit filtering") parser.add_argument('--exclusionDirectory', '-e', help="Specify a directory of previously-generated XML files to exclude") inputFilePath = parser.parse_args().inputFilePath outputFilePath = parser.parse_args().outputFilePath inclusionFilePath = parser.parse_args().inclusionFilePath inclusionColumnHeader = parser.parse_args().inclusionColumnHeader cutoffDate = parser.parse_args().cutoffDate exclusionDirectory = parser.parse_args().exclusionDirectory if not (inputFilePath and outputFilePath and inclusionFilePath and inclusionColumnHeader): raise ValueError('You are missing an argument. Run with --help for more information.') if cutoffDate: incList = ListFromExcel(inclusionFilePath, inclusionColumnHeader, date=cutoffDate) else: incList = ListFromExcel(inclusionFilePath, inclusionColumnHeader) f = Filter(inputFilePath, outputFilePath) f.inclusionListAdd(incList) if exclusionDirectory: exList = ListFromDirectory('xmlfiles') f.exclusionListAdd(exList) f.filter() print("Data has been extracted to the shelf file '%s'" % outputFilePath)
def run_Filter(self): self.update_file_names('FilterComplete') # Set up filter object f = Filter(self.input_file_path(), self.output_file_path()) if 'inclusionListObject' in self.filter_configs: f.inclusionListAdd(self.filter_configs["inclusionListObject"]) if 'exclusionListObject' in self.filter_configs: f.exclusionListAdd(self.filter_configs["exclusionListObject"]) if 'inclusionDirectoryPath' in self.filter_configs: incList = FilterLists.ListFromDirectory(self.filter_configs["inclusionDirectoryPath"]) f.inclusionListAdd(incList) if 'inclusionFilePath' in self.filter_configs: incList = FilterLists.ListFromExcel(self.filter_configs["inclusionFilePath"], 'ID', self.filter_configs["inclusionCutoffDate"]) f.inclusionListAdd(incList) if 'exclusionDirectoryPath' in self.filter_configs: excList = FilterLists.ListFromDirectory(self.filter_configs["exclusionDirectoryPath"]) f.exclusionListAdd(excList) if 'exclusionURL' in self.filter_configs: excList = FilterLists.ListFromURL(self.filter_configs["exclusionURL"]) f.exclusionListAdd(excList) f.filter()
def run_Filter(self): self.update_file_names('FilterComplete') # Set up filter object f = Filter(self.input_file_path(), self.output_file_path()) if 'inclusionListObject' in self.filter_configs: f.inclusionListAdd(self.filter_configs["inclusionListObject"]) if 'exclusionListObject' in self.filter_configs: f.exclusionListAdd(self.filter_configs["exclusionListObject"]) if 'inclusionDirectoryPath' in self.filter_configs: incList = FilterLists.ListFromDirectory( self.filter_configs["inclusionDirectoryPath"]) f.inclusionListAdd(incList) if 'inclusionFilePath' in self.filter_configs: incList = FilterLists.ListFromExcel( self.filter_configs["inclusionFilePath"], 'ID', self.filter_configs["inclusionCutoffDate"]) f.inclusionListAdd(incList) if 'exclusionDirectoryPath' in self.filter_configs: excList = FilterLists.ListFromDirectory( self.filter_configs["exclusionDirectoryPath"]) f.exclusionListAdd(excList) if 'exclusionURL' in self.filter_configs: excList = FilterLists.ListFromURL( self.filter_configs["exclusionURL"]) f.exclusionListAdd(excList) f.filter()