def test_bids_df_anat(download_data_testing_test_files, loader_parameters): """ Test for MRI anat nii.gz file format Test for when no file extensions are provided Test for multiple target_suffix Test behavior when "roi_suffix" is not None """ bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_bids_df_no_validate(download_data_testing_test_files, loader_parameters): """ Test for ct-scan nii.gz file format Test for when validate_BIDS is set to False for the loader """ # Rename files so the loader won't pick them up if validate_BIDS is true Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "sub-spleen2").rename( Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "ssub-spleen2")) bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "ssub-spleen2").rename( Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "sub-spleen2")) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_bids_df_microscopy_png(download_data_testing_test_files, loader_parameters): """ Test for microscopy png file format Test for _sessions.tsv and _scans.tsv files Test for target_suffix as a nested list Test for when no contrast_params are provided """ bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_bids_df_anat(loader_parameters): # Test for MRI anat nii.gz file format # Test for when no file extensions are provided # Test for multiple target_suffix loader_params = loader_parameters loader_params["contrast_params"]["contrast_lst"] = loader_params[ "contrast_params"]["training_validation"] bids_path = loader_params["bids_path"] derivatives = True df_test = imed_loader_utils.create_bids_dataframe(loader_params, derivatives) df_test = df_test.drop(columns=['path', 'parent_path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(bids_path, "df_ref.csv") csv_test = os.path.join(bids_path, "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_bids_df_microscopy_png(loader_parameters): # Test for microscopy png file format # Test for _sessions.tsv and _scans.tsv files # Test for target_suffix as a nested list # Test for when no contrast_params are provided loader_params = loader_parameters loader_params["contrast_params"]["contrast_lst"] = loader_params[ "contrast_params"]["training_validation"] bids_path = loader_params["bids_path"] derivatives = True df_test = imed_loader_utils.create_bids_dataframe(loader_params, derivatives) df_test = df_test.drop(columns=['path', 'parent_path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(bids_path, "df_ref.csv") csv_test = os.path.join(bids_path, "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_columns_and_rows_changed(): diff = compare(load_csv(io.StringIO(SEVEN), key="id"), load_csv(io.StringIO(EIGHT), key="id")) assert (dedent(""" 2 columns added, 1 column removed, 1 row changed, 1 row added, 1 row removed 2 columns added age length 1 column removed weight 1 row changed id: 3 name: "Bailey" => "Bailee" 1 row added id: 4 name: Bob age: 7 length: 422 1 row removed id: 1 name: Cleo weight: 48 """).strip() == human_text(diff, "id"))
def compare_csv(file1, file2): diff = compare( load_csv(open(file1), key="Name"), load_csv(open(file2), key="Name"), ) return diff
def test_row_changed_and_row_added_and_row_deleted(): "Should have headers for each section here" diff = compare( load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(SIX), key="id") ) assert ( dedent( """ 1 row changed, 1 row added, 1 row removed 1 row changed id: 1 age: "4" => "5" 1 row added id: 3 name: Bailey age: 1 1 row removed id: 2 name: Pancakes age: 2 """ ).strip() == human_text(diff, "id") )
def test_row_changed(): diff = compare(load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id")) assert (dedent(""" 1 row changed id: 1 age: "4" => "5" """).strip() == human_text(diff, "id"))
def test_row_removed(): diff = compare(load_csv(io.StringIO(TWO), key="id"), load_csv(io.StringIO(THREE), key="id")) assert (dedent(""" 1 row removed id: 2 name: Pancakes age: 2 """).strip() == human_text(diff, "id"))
def test_columns_changed(): diff = compare(load_csv(io.StringIO(SIX), key="id"), load_csv(io.StringIO(SEVEN), key="id")) assert { "changed": [], "removed": [], "added": [], "columns_added": ["weight"], "columns_removed": ["age"], } == diff
def test_single_csv_parser(tmp_dir): resources_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "resources") input_dir = os.path.join(resources_dir, "input") golden_dir = os.path.join(resources_dir, "golden") # output_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output") # For debugging purposes output_dir = tmp_dir input_statements_csv = load_csv(open(os.path.join(input_dir, "test.csv"))) process(input_dir, output_dir, ["csv"], False) output_statements_csv = load_csv( open(os.path.join(output_dir, "statements.csv"))) diff = compare(input_statements_csv, output_statements_csv) assert_csv_diff(diff) golden = load_csv(open(os.path.join(golden_dir, "app8-part4-1.csv"))) output = load_csv(open(os.path.join(output_dir, "app8-part4-1.csv"))) diff = compare(golden, output) assert_csv_diff(diff) golden = load_csv(open(os.path.join(golden_dir, "app8-part1.csv"))) output = load_csv(open(os.path.join(output_dir, "app8-part1.csv"))) diff = compare(golden, output) assert_csv_diff(diff) golden = load_csv(open(os.path.join(golden_dir, "app5-table2.csv"))) output = load_csv(open(os.path.join(output_dir, "app5-table2.csv"))) diff = compare(golden, output) assert_csv_diff(diff) golden = os.path.join(golden_dir, "dec50_2020_data.xml") output = os.path.join(output_dir, "dec50_2020_data.xml") assert len(main.diff_files(golden, output)) == 0
def test_row_removed(): diff = compare(load_csv(io.StringIO(TWO), key="id"), load_csv(io.StringIO(THREE), key="id")) assert { "changed": [], "removed": [{ "age": "2", "id": "2", "name": "Pancakes" }], "added": [], "columns_added": [], "columns_removed": [], } == diff
def test_columns_changed(): diff = compare(load_csv(io.StringIO(SIX), key="id"), load_csv(io.StringIO(SEVEN), key="id")) assert (dedent(""" 1 column added, 1 column removed 1 column added weight 1 column removed age """).strip() == human_text(diff, "id"))
def test_split_csv(tmpdir): source = join(DATA_PATH, "source_01.csv") dest_path = tmpdir.mkdir("dest") destinations = ( dest_path.join("actual_x_01.csv"), dest_path.join("actual_y_01.csv"), ) split_csv( source, var_name="language_code", separator="::", destinations=destinations, ) diff = compare( load_csv(open(join(DATA_PATH, "expected_x_01.csv")), key="id"), load_csv(open(destinations[0]), key="id"), ) assert not any(list(diff.values())) diff = compare( load_csv(open(join(DATA_PATH, "expected_y_01.csv")), key="id"), load_csv(open(destinations[1]), key="id"), ) assert not any(list(diff.values()))
def test_bids_df_ctscan(download_data_testing_test_files, loader_parameters): """ Test for ct-scan nii.gz file format Test for when dataset_description.json is not present in derivatives folder """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}
def test_bids_df_multi(download_data_testing_test_files, loader_parameters): """ Test for multiple folders in path_data """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref_multi.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test_multi.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}
def test_row_changed_show_unchanged(): diff = compare( load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id"), show_unchanged=True, ) assert (dedent(""" 1 row changed id: 1 age: "4" => "5" Unchanged: name: "Cleo" """).strip() == human_text(diff, "id"))
def test_row_changed(): diff = compare(load_csv(io.StringIO(ONE), key="id"), load_csv(io.StringIO(TWO), key="id")) assert { "added": [], "removed": [], "changed": [{ "key": "1", "changes": { "age": ["4", "5"] } }], "columns_added": [], "columns_removed": [], } == diff
def test_no_key(): diff = compare(load_csv(io.StringIO(NINE)), load_csv(io.StringIO(TEN))) assert (dedent(""" 1 row added, 1 row removed 1 row added id: 2 name: Pancakes age: 3 1 row removed id: 2 name: Pancakes age: 4 """).strip() == human_text(diff))
def is_different(): print('Diffing files...\n') for filename in os.listdir(DIRNAME): if not os.path.exists(os.path.join(ARCHIVE_DIRNAME, filename)): return True print(filename) diff = compare( load_csv(open(os.path.join(DIRNAME, filename), encoding='utf8')), load_csv( open(os.path.join(ARCHIVE_DIRNAME, filename), encoding='utf8'))) print(diff) print() if len(diff['added']) > 0 or len(diff['removed']) > 0: return True return False
def test_rows_added(): diff = compare(load_csv(io.StringIO(THREE), key="id"), load_csv(io.StringIO(FIVE), key="id")) assert (dedent(""" 3 rows added id: 2 name: Pancakes age: 2 id: 3 name: Bailey age: 1 id: 4 name: Carl age: 7 """).strip() == human_text(diff, "id"))
def test_bids_df_microscopy_png(loader_parameters): """ Test for microscopy png file format Test for _sessions.tsv and _scans.tsv files Test for target_suffix as a nested list Test for when no contrast_params are provided """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) # TODO: modify df_ref.csv file in data-testing dataset to include "participant_id" # and "sample_id" columns, then delete next line df_test = df_test.drop(columns=['participant_id', 'sample_id']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}
def Diff(email=True): for tfile in os.listdir(temp): if tfile.endswith(".csv"): tfile = os.path.join(temp, tfile) sfile = os.path.join(scanpath, os.path.basename(tfile)) domain = os.path.basename(tfile).replace(".csv", "") if os.path.exists(sfile): diff = compare(load_csv(open(tfile), key="domain-name"), load_csv(open(sfile), key="domain-name")) if not str( diff ) == "{'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}": data = ParseData(diff) if email: Mail(domain=domain, new=False, data=data) else: print(data) else: # New domain, email the csv file Mail(domain, True, tfile)
def diff_check(supplier): files = sorted([x for x in os.listdir("./data/") if x.endswith(".csv")], key=lambda x: os.path.getmtime("./data/" + x), reverse=True) if len(files) < 2: email_error.send_email("not enough files to compare") return for i in range(len(files)): if supplier in str(files[i]): now = files[i] for j in range(i + 1, len(files)): if supplier in str(files[j]): recent = files[j] break break diff = compare(load_csv(open("./data/" + now), key="plan_id"), load_csv(open("./data/" + recent), key="plan_id")) if diff['added'] == [] or diff['removed'] == []: os.remove("./data/" + now) print('deleted')
def test_bids_df_anat(loader_parameters): """ Test for MRI anat nii.gz file format Test for when no file extensions are provided Test for multiple target_suffix TODO: modify test and "df_ref.csv" file in data-testing dataset to test behavior when "roi_suffix" is not None """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives = True) df_test = bids_df.df.drop(columns=['path']) # TODO: modify df_ref.csv file in data-testing dataset to include "participant_id" # column then delete next line df_test = df_test.drop(columns=['participant_id']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}
def diff(self): return compare(self.local, self.git)
def diff(file1, file2, k): delta = compare(load_csv(open(file1), key=k), load_csv(open(file2), key=k), True) return delta
from csv_diff import load_csv, compare import pprint import csv roster = input("Enter path to main csv file: ") export = input("Enter path to comparative csv file: ") # Saves the comparison data from the designated .csv files in a dictionary diff = compare(load_csv(open(roster.strip()), key="ID"), load_csv(open(export.strip()), key="ID")) added = diff['added'] removed = diff['removed'] pprint.pprint(diff) # pprint.pprint(added) # pprint.pprint(removed) print() with open('/Users/laptop/Desktop/MissingFromRoster.csv', 'w') as file: csvWriter = csv.writer(file) # Writing the file header numOfStudentKeys = len(added[0].keys()) studentKeys = list(added[0].keys()) csvWriter.writerow(studentKeys) print(('*' * 20) + '\n' + str(len(added)) + " " + 'Missing From the Roster' + '\n') print(('*' * 20)) # Writing student data to file for student in added: studentsValues = list(student.values())
def get_suppliers(zipcode): """ This function loops through each of the companies returned from the get_distribution_companies and gets the supplier's list for each of them :param zipcode: zipcode :return: A csv file is saved, returns a Bool value if the zipcode scrape was successful or not """ # Iterates over all the TDUs for that particular zipcode # type(dist_company) is Python Dictionary timestamp_start = datetime.today() for dist_company in get_distribution_companies(zipcode=zipcode): # TDU ID - internal company_id = dist_company['distributionCompanyId'] # TDU Name company_name = dist_company['distributionCompanyName'] # TDU is municipal is_municipal = dist_company['isMunicipalElectricCompany'] # Checking if TDU is municipal, if not then proceed ... if not is_municipal: post_data = dict(customerClassId="1", distributionCompanyId=str(company_id), distributionCompanyName=str(company_name), monthlyUsage=600, zipCode=zipcode) # Making the second request, now to get the supplier's list for that particular TDU r2 = requests.post( "http://www.energyswitchma.gov/consumers/compare", data=post_data) suppliers_list = json.loads(r2.text) # Creating a Dataframe (a table like format) for easier analysis and exporting to CSV df = pd.DataFrame.from_dict(suppliers_list) # Mentioning the columns we want df = df[[ 'supplierName', 'pricingStructureDescription', 'pricePerMonth', 'pricePerUnit', 'introductoryPrice', 'introductoryPrice', 'enrollmentFee', 'contractTerm', 'earlyTerminationDetailExport', 'hasAutomaticRenewal', 'automaticRenewalDetail', 'renewableEnergyProductPercentage', 'renewableEnergyProductDetail', 'otherProductServicesDetail', 'isDistributionCompany', 'estimatedCost', 'otherProductServices' ]] # Adding zipcode column to the Dataframe df["Zipcode"] = zipcode # Adding timestamp column to the Dataframe df["Date_Downloaded"] = timestamp_start.strftime('%m/%d/%y %H:%M') df['TDU_Service_Territory'] = company_name # Change column/header names as per convention df.columns = [ 'Supplier_Name', 'Rate_Type', 'Fixed_Charge', 'Variable_Rate', 'Introductory_Rate', 'Introductory_Price_Value', 'Enrollment_Fee', 'Contract_Term', 'Early_Termination_Fee', 'Automatic_Renewal_Type', 'Automatic_Renewal_Detail', 'Percent_Renewable', 'Renewable_Description', 'Incentives_Special_Terms', 'Incumbent_Flag', 'Estimated_Cost', 'Other_Product_Services', 'Zipcode', 'Date_Downloaded', 'TDU_Service_Territory' ] # Modifying variable rate column to convert to dollars df['Variable_Rate'] = df['Variable_Rate'].apply( convert_cents_to_dollars) # Adding the introductory_rate column based on introductory_rate_value column df['Introductory_Rate'] = df['Introductory_Price_Value'].apply( lambda x: True if x else False) timestamp_filename_format = timestamp_start.strftime( '%m%d%y_%H_%M_%S') zipcode_filename = f'results_MA/{zipcode}_CI{company_id}_{timestamp_filename_format}.csv' file_zipcode_ci = glob.glob( f'results_MA/{zipcode}_CI{company_id}*.csv') # If this is not the first time the file was written, check for plan updates if len(file_zipcode_ci) > 0: # Identify the most recent filename date_regex = re.compile("\d{6}") date_num_list = [] for row in file_zipcode_ci: date = date_regex.findall(row)[0] date_num = int(date[4:6] + date[0:4]) date_num_list.append(date_num) filename_prev = file_zipcode_ci[date_num_list.index( max(date_num_list))] # Check whether there have been any plan updates since the last data pull df.to_csv("results_MA/trash.csv", index=False, float_format="%.5f") df_new = pd.read_csv("results_MA/trash.csv") df_new.__delitem__('Date_Downloaded') df_new.__delitem__('Zipcode') df_new.fillna(value=pd.np.nan, inplace=True) df_new = df_new.replace(r'^\s*$', pd.np.nan, regex=True) df_new = df_new.replace('\\n|\\r|\s', '', regex=True) df_new['Key'] = df_new.apply( lambda row: '_'.join(row.values.astype(str)), axis=1) df_new.to_csv("results_MA/trash.csv", index=False, float_format="%.5f") df_previous = pd.read_csv(filename_prev) df_previous.__delitem__('Date_Downloaded') df_previous.__delitem__('Zipcode') df_previous.fillna(value=pd.np.nan, inplace=True) df_previous = df_previous.replace(r'^\s*$', pd.np.nan, regex=True) df_previous = df_previous.replace('\\n|\\r|\s', '', regex=True) df_previous['Key'] = df_previous.apply( lambda row: '_'.join(row.values.astype(str)), axis=1) df_previous.to_csv("results_MA/trash_old.csv", index=False, float_format="%.5f") diff = compare( load_csv(open("results_MA/trash_old.csv"), key="Key"), load_csv(open("results_MA/trash.csv"), key="Key")) #df_previous = pd.read_csv(file_zipcode_ci[0], float_precision='round_trip') #df_new = pd.read_csv("results_MA/trash.csv", float_precision='round_trip') if (diff['added'] == []) and (diff['removed'] == []) and (diff['changed'] == []): print("\t Previously scraped, no updates found.") else: print("\tWriting to a new file: ", zipcode_filename) df.to_csv(zipcode_filename, index=False, float_format="%.5f") print("\t Updating tracking ...") update_tracking( zipcode=zipcode, is_new_entry=False, timestamp=timestamp_start.strftime('%m/%d/%y %H:%M'), filename=zipcode_filename) else: print("\t Writing to a new file: ", zipcode_filename) df.to_csv(zipcode_filename, index=False, float_format="%.5f") print("\t Updating tracking ...") update_tracking( zipcode=zipcode, is_new_entry=True, timestamp=timestamp_start.strftime('%m/%d/%y %H:%M'), filename=zipcode_filename) return True # If something goes wrong, False is returned return False