def test_amazon_pulling(): df = pulling_logic.pulling_amazon( resource_filename("vadertester", "json/Amazon_githubdata.json.gz")) errors = [] github_line_amount = 37 # First checks if the data is being put into a form properly if not len(df.columns) == 3: errors.append("incorrect column amount") # This error needs to be asserted because the key is used later on in the test assert df.columns[0] == "ID", "ID incorrectly labeled" if not df.columns[1] == "ReviewText": errors.append("ReviewText Column incorrectly labeled") # This error needs to be asserted because the key is used later on in the test assert df.columns[ 2] == "ReviewScore", "ReviewScore Column incorrectly labeled" # Checks if the IDs are Unique (test may take a long time on large data) if not df['ID'].is_unique: errors.append("ID column values are not unique") # Check if ReviewScore is all numeric if not df['ReviewScore'].dtype == "float64": errors.append("Rating column not numeric") # Checks if all lines from github data imported (github data contains 37 lines) if not len(df.index) == github_line_amount: errors.append("Not All data is imported correctly") assert not errors, "errors occurred:\n{}".format("\n".join(errors))
def main(): saved = sys.stdout with open('output.txt', 'w') as fout: sys.stdout = writer(sys.stdout, fout) print("Vader tester started") parser = create_parser() args = parser.parse_args() print("Loading test data from '" + args.file_input + "'") all_df = pulling_amazon(args.file_input) print("Getting " + str(args.amount) + " random rows from data") if len(all_df.index) < args.amount: raise Exception( "Sample cannot be bigger than dataset amount which is " + str(len(all_df.index)) + " rows") random_sample_df = random_sample(all_df, args.amount) print("Finished loading test data") print("Running tests") run_tests(random_sample_df) print("Finished running tests") print("Displaying output") report_generation(random_sample_df) print("Vader tester finished") sys.stdout = saved
def tested_data_frame(): df = pulling_amazon(resource_filename("vadertester", "json/Amazon_githubdata.json.gz")) testing_logic.run_tests(df) return df
def amazon_data_frame_tested(): df = pulling_amazon( resource_filename("vadertester", "json/Amazon_githubdata.json.gz")) df = random_sample(df, 30) run_tests(df) return df
def amazon_data_frame(): return pulling_amazon( resource_filename("vadertester", "json/Amazon_githubdata.json.gz"))