def generateCSV_test(self): ''' make sure things are being packed into dataframes correctly; assumes Pandas writes dataframes to csv correctly. ''' truth = [True, False, False] results = [ [False, False, False], [True, True, True] ] tests = ['x', 'y'] keys = [1000,1001,1002] df = main.generateCSV(truth, results, tests, keys, 'test') dfTrue = pandas.DataFrame([[True, False, True],[False, False, True],[False, False, True]], index=keys, columns=['True Flags', 'x', 'y']) assert_frame_equal(df, dfTrue, check_names=True)
def generateCSV_test(self): ''' make sure things are being packed into dataframes correctly; assumes Pandas writes dataframes to csv correctly. ''' truth = [True, False, False] results = [[False, False, False], [True, True, True]] tests = ['x', 'y'] keys = [1000, 1001, 1002] df = main.generateCSV(truth, results, tests, keys, 'test') dfTrue = pandas.DataFrame( [[True, False, True], [False, False, True], [False, False, True]], index=keys, columns=['True Flags', 'x', 'y']) assert_frame_equal(df, dfTrue, check_names=True)
testNames.sort() print('{} quality control checks have been found'.format(len(testNames))) testNames = main.checkQCTestRequirements(testNames) print('{} quality control checks are able to be run:'.format( len(testNames))) for testName in testNames: print(' {}'.format(testName)) # Identify data files and create a profile list. filenames = main.readInput('datafiles.json') profiles = main.extractProfiles(filenames) data.ds.profiles = profiles print('\n{} file(s) will be read containing {} profiles'.format( len(filenames), len(profiles))) # Parallel processing. print('\nPlease wait while QC is performed\n') processFile.parallel = main.parallel_function(processFile, sys.argv[2]) parallel_result = processFile.parallel(filenames) # Recombine results truth, results, profileIDs = main.combineArrays(parallel_result) # Print summary statistics and write output file. main.printSummary(truth, results, testNames) main.generateCSV(truth, results, testNames, profileIDs, sys.argv[1]) else: print 'Please add command line arguments to name your output file and set parallelization:' print 'python AutoQC myFile 4' print 'will result in output written to results-myFile.csv, and will run the calculation parallelized across 4 cores.'
# Identify and import tests testNames = main.importQC('qctests') testNames.sort() print('{} quality control checks have been found'.format(len(testNames))) testNames = main.checkQCTestRequirements(testNames) print('{} quality control checks are able to be run:'.format(len(testNames))) for testName in testNames: print(' {}'.format(testName)) # Identify data files and create a profile list. filenames = main.readInput('datafiles.json') profiles = main.extractProfiles(filenames) data.ds.profiles = profiles print('\n{} file(s) will be read containing {} profiles'.format(len(filenames), len(profiles))) # Parallel processing. print('\nPlease wait while QC is performed\n') processFile.parallel = main.parallel_function(processFile, sys.argv[2]) parallel_result = processFile.parallel(filenames) # Recombine results truth, results, profileIDs = main.combineArrays(parallel_result) # Print summary statistics and write output file. main.printSummary(truth, results, testNames) main.generateCSV(truth, results, testNames, profileIDs, sys.argv[1]) else: print 'Please add command line arguments to name your output file and set parallelization:' print 'python AutoQC myFile 4' print 'will result in output written to results-myFile.csv, and will run the calculation parallelized across 4 cores.'