def test_extra_fields_pd(self): pdf = PanDatFactory(boger=[["a"], ["b", "c"]]) dat = pdf.PanDat(boger=pd.DataFrame({ "a": [1, 2, 3], "b": [4, 5, 6], "c": ['a', 'b', 'c'] })) schema = "test_pd_extra_fields" pdf.pgsql.write_schema(self.engine, schema, forced_field_types={ ("boger", "c"): "text", ("boger", "a"): "float" }) pdf.pgsql.write_data(dat, self.engine, schema) pdf2 = PanDatFactory(boger=[["a"], ["b"]]) dat2 = pdf2.pgsql.create_pan_dat(self.engine, schema) self.assertTrue( list(dat2.boger["a"]) == [1.0, 2.0, 3.0] and list(dat2.boger["b"]) == [4.0, 5.0, 6.0]) dat2_2 = pdf2.PanDat(boger=pd.DataFrame({ "a": [10, 300], "b": [40, 60] })) pdf2.pgsql.write_data(dat2_2, self.engine, schema) dat = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue( list(dat.boger["a"]) == [10, 300] and list(dat.boger["b"]) == [40, 60]) self.assertTrue(len(set(dat.boger["c"])) == 1)
def testNullsPd(self): pdf = PanDatFactory(table=[[], ["field one", "field two"]]) for f in ["field one", "field two"]: pdf.set_data_type("table", f, nullable=True) dat = pdf.PanDat( table={ "field one": [None, 200, 0, 300, 400], "field two": [100, 109, 300, None, 0] }) schema = test_schema + "_bool_defaults_pd" pdf.pgsql.write_schema(self.engine, schema, include_ancillary_info=False) pdf.pgsql.write_data(dat, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue( pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True)) pdf = PanDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: pdf.set_data_type("table", f, max=float("inf"), inclusive_max=True) pdf.set_infinity_io_flag(None) dat_inf = pdf.PanDat( table={ "field one": [float("inf"), 200, 0, 300, 400], "field two": [100, 109, 300, float("inf"), 0] }) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat_inf, dat_1)) pdf.pgsql.write_data(dat_inf, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat_inf, dat_1)) pdf = PanDatFactory(table=[["field one"], ["field two"]]) for f in ["field one", "field two"]: pdf.set_data_type("table", f, min=-float("inf"), inclusive_min=True) pdf.set_infinity_io_flag(None) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertFalse(pdf._same_data(dat_inf, dat_1)) dat_inf = pdf.PanDat( table={ "field one": [-float("inf"), 200, 0, 300, 400], "field two": [100, 109, 300, -float("inf"), 0] }) self.assertTrue(pdf._same_data(dat_inf, dat_1))
def test_missing_tables(self): schema = test_schema + "_missing_tables" tdf_1 = TicDatFactory(this=[["Something"], ["Another"]]) pdf_1 = PanDatFactory(**tdf_1.schema()) tdf_2 = TicDatFactory( **dict(tdf_1.schema(), that=[["What", "Ever"], []])) pdf_2 = PanDatFactory(**tdf_2.schema()) dat = tdf_1.TicDat(this=[["a", 2], ["b", 3], ["c", 5]]) pan_dat = tdf_1.copy_to_pandas(dat, drop_pk_columns=False) tdf_1.pgsql.write_schema(self.engine, schema) tdf_1.pgsql.write_data(dat, self.engine, schema) pg_dat = tdf_2.pgsql.create_tic_dat(self.engine, schema) self.assertTrue(tdf_1._same_data(dat, pg_dat)) pg_pan_dat = pdf_2.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf_1._same_data(pan_dat, pg_pan_dat))
def testIssue45(self): schema = test_schema + "issue45" pdf = PanDatFactory(data=[["a"], ["b"]]) pdf.set_data_type("data", "b", number_allowed=False, strings_allowed='*') tdf = TicDatFactory.create_from_full_schema( pdf.schema(include_ancillary_info=True)) tic_dat = tdf.TicDat(data=[[2, "1"], [4, "3"], [44, "022"]]) dat = tdf.copy_to_pandas(tic_dat, drop_pk_columns=False) self.assertFalse(tdf.find_data_type_failures(tic_dat)) self.assertFalse(pdf.find_data_type_failures(dat)) pdf.pgsql.write_schema(self.engine, schema, forced_field_types={("data", "a"): "integer"}) pdf.pgsql.write_data(dat, self.engine, schema) def two_checks(): dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_1)) tic_dat_1 = tdf.pgsql.create_tic_dat(self.engine, schema) self.assertTrue(tdf._same_data(tic_dat, tic_dat_1)) two_checks() tdf.pgsql.write_data(tic_dat, self.engine, schema) two_checks()
def test_nullables(self): schema = test_schema + "nullables" pdf = PanDatFactory(table_with_stuffs=[["field one"], ["field two"]]) pdf.set_data_type("table_with_stuffs", "field one") pdf.set_data_type("table_with_stuffs", "field two", number_allowed=False, strings_allowed='*', nullable=True) tdf = TicDatFactory.create_from_full_schema( pdf.schema(include_ancillary_info=True)) tic_dat = tdf.TicDat( table_with_stuffs=[[101, "022"], [202, None], [303, "111"]]) dat = tdf.copy_to_pandas(tic_dat, drop_pk_columns=False) self.assertFalse(tdf.find_data_type_failures(tic_dat)) self.assertFalse(pdf.find_data_type_failures(dat)) pdf.pgsql.write_schema(self.engine, schema) pdf.pgsql.write_data(dat, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue( pdf._same_data(dat, dat_1, nans_are_same_for_data_rows=True)) tic_dat_1 = tdf.pgsql.create_tic_dat(self.engine, schema) self.assertTrue( tdf._same_data(tic_dat, tic_dat_1, nans_are_same_for_data_rows=True))
def __init__(self): self.config_schema = PanDatFactory(action_settings=[ [], ['Table', 'Column', 'Method', 'Value', 'Flag Column'] ]) self.config_defaults = self.config_schema.PanDat( action_settings=[{ 'Table': 'data', 'Column': 'column1', 'Method': 'zScore', 'Value': '3', 'Flag Column': 'flag' }])
def test_parameters_pd(self): schema = test_schema + "_parameters_pd" pdf = PanDatFactory(parameters=[["Key"], ["Value"]]) pdf.add_parameter("Something", 100) pdf.add_parameter("Different", 'boo', strings_allowed='*', number_allowed=False) dat = TicDatFactory(**pdf.schema()).TicDat( parameters=[["Something", float("inf")], ["Different", "inf"]]) dat = TicDatFactory(**pdf.schema()).copy_to_pandas( dat, drop_pk_columns=False) pdf.pgsql.write_schema(self.engine, schema) pdf.pgsql.write_data(dat, self.engine, schema) dat_ = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_))
def test_big_diet_pd(self): if not self.can_run: return tdf = diet_schema pdf = PanDatFactory(**tdf.schema()) pgpf = PostgresPanFactory(pdf) big_dat = diet_schema.copy_tic_dat(diet_dat) for k in range(int(1e5)): big_dat.categories[str(k)] = [0, 100] pan_dat = pan_dat_maker(tdf.schema(), big_dat) schema = "test_pg_big_diet" now = time.time() pgpf.write_schema(self.engine, schema) pgpf.write_data(pan_dat, self.engine, schema) print(f"**&&*{time.time()-now}**&&**") now = time.time() pg_pan_dat = pgpf.create_pan_dat(self.engine, schema) print(f"*&&*{time.time()-now}**&&**") self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat))
def testDietWithInfFlaggingPd(self): pdf = PanDatFactory.create_from_full_schema( diet_schema.schema(include_ancillary_info=True)) dat = diet_schema.copy_to_pandas(diet_dat, drop_pk_columns=False) pdf.set_infinity_io_flag(999999999) schema = test_schema + "_diet_inf_flagging_pd" pdf.pgsql.write_schema(self.engine, schema) pdf.pgsql.write_data(dat, self.engine, schema) dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_1)) pdf = pdf.clone() dat_1 = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(dat, dat_1)) tdf = PanDatFactory(**diet_schema.schema()) dat_1 = tdf.pgsql.create_pan_dat(self.engine, schema) self.assertFalse(tdf._same_data(dat, dat_1)) protein = dat_1.categories["Name"] == "protein" self.assertTrue( list(dat_1.categories[protein]["Max Nutrition"])[0] == 999999999) dat_1.categories.loc[protein, "Max Nutrition"] = float("inf") self.assertTrue(tdf._same_data(dat, dat_1))
# particular day. We use lexicographic optimization to solve the model: # first, we minimize the linear sum of the slacks. Then, we constrain # the sum of the slacks, and minimize the total payment to workers. # Finally, we minimize a quadratic objective that # tries to balance the workload among the workers. # from ticdat import PanDatFactory, standard_main try: # if you don't have amplpy installed, the code will still load and then fail on solve from amplpy import AMPL except: AMPL = None # ------------------------ define the input schema -------------------------------- input_schema = PanDatFactory( workers=[["Name"], ["Payment"]], shifts=[["Name"], ["Requirement"]], availability=[["Worker", "Shift"], []] ) # Define the foreign key relationships input_schema.add_foreign_key("availability", "workers", ['Worker', 'Name']) input_schema.add_foreign_key("availability", "shifts", ['Shift', 'Name']) # Define the data types input_schema.set_data_type("workers", "Payment", min=0, max=float("inf"), inclusive_min=True, inclusive_max=True) input_schema.set_data_type("shifts", "Requirement", min=0, max=float("inf"), inclusive_min=True, inclusive_max=True) # --------------------------------------------------------------------------------- # ------------------------ define the output schema ------------------------------- solution_schema = PanDatFactory(
def pan_dat_maker(schema, tic_dat): tdf = TicDatFactory(**schema) pdf = PanDatFactory(**schema) return pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, tic_dat))
# Simplest diet example using amplpy and ticdat from amplpy import AMPL from ticdat import PanDatFactory, standard_main input_schema = PanDatFactory ( categories=[["Name"],["Min Nutrition", "Max Nutrition"]], foods=[["Name"], ["Cost"]], nutrition_quantities=[["Food", "Category"], ["Quantity"]]) # There are three solution tables, with 3 primary key fields and 3 data fields. solution_schema = PanDatFactory( parameters=[["Parameter"], ["Value"]], buy_food=[["Food"], ["Quantity"]], consume_nutrition=[["Category"], ["Quantity"]]) def solve(dat): # build the AMPL math model ampl = AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(""" set CAT; set FOOD; param cost {FOOD} > 0, < Infinity; param n_min {CAT} >= 0, < Infinity; param n_max {i in CAT} >= n_min[i]; param amt {FOOD, CAT} >= 0, < Infinity;
from ticdat import PanDatFactory input_schema = PanDatFactory( plants=[["Name"], []], warehouses=[["Name"], ["Max Assignment Capacity", "Fixed Cost"]], customers=[["Name"], []], products=[["Name"], ["Warehouse Volume"]], demand=[["Customer", "Product"], ["Demand"]], supply=[["Plant", "Product"], ["Supply"]], plant_to_warehouse_costs=[["Plant", "Warehouse", "Product"], ["Cost"]], warehouse_to_customer_costs=[["Warehouse", "Customer", "Product"], ["Cost"]], warehouse_to_customer_distances=[["Warehouse", "Customer"], ["Distance"]], parameters=[["Parameter"], ["Value"]]) input_schema.add_parameter("Number of Warehouses", default_value=4, inclusive_min=False, inclusive_max=False, min=0, max=float("inf"), must_be_int=True) input_schema.add_parameter("High Service Distance", default_value=0, inclusive_min=True, inclusive_max=True, min=0, max=float("inf"), must_be_int=False) input_schema.add_parameter("Maximum Average Service Distance", default_value=float("inf"),
from ticdat import PanDatFactory input_schema = PanDatFactory(cities=[["Name"],["Demand"]], distances=[["Source", "Destination"], ["Distance"]], parameters=[["Parameter"], ["Value"]]) input_schema.add_parameter("Number of Centroids", default_value=4, inclusive_min=False, inclusive_max=False, min=0, max=float("inf"), must_be_int=True) input_schema.set_data_type("cities", "Demand", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False) input_schema.set_data_type("distances", "Distance", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False) input_schema.add_foreign_key("distances", "cities", ['Source', 'Name']) input_schema.add_foreign_key("distances", "cities", ['Destination', 'Name']) # The distance matrix is bi-directionally safe. I.e. if the same source/dest and dest/source exist then the # distances must match. If only one is present, it can fall back to the other in the code. def _distance_matrix(dat): return {"distance_matrix": {tuple(row[:2]): row[2] for row in dat.distances.itertuples(index=False)}} input_schema.add_data_row_predicate("distances", predicate_name="Check Bi-Directionally Safe", predicate=lambda row, distance_matrix: ((row["Destination"], row["Source"]) not in distance_matrix) or (row["Distance"] == distance_matrix[row["Destination"], row["Source"]]), predicate_kwargs_maker=_distance_matrix) solution_schema = PanDatFactory(openings=[['City'],[]], assignments=[['City', 'Assigned To'],[]], parameters=[["Parameter"], ["Value"]]) def solve(dat): assert input_schema.good_pan_dat_object(dat), "bad dat check" assert not input_schema.find_duplicates(dat), "duplicate row check" assert not input_schema.find_foreign_key_failures(dat), "foreign key check" assert not input_schema.find_data_type_failures(dat), "data type value check" assert not input_schema.find_data_row_failures(dat), "data row check"
# Provides command line interface via ticdat.standard_main # For example, typing # python metrorail.py -i metrorail_sample_data.json -o metrorail_solution_data.json # will read from a model stored in the file metrorail_sample_data.json and write the # solution to metrorail_solution_data.json. # this version of the file uses amplpy and Gurobi from amplpy import AMPL from ticdat import PanDatFactory, standard_main from itertools import product from pandas import DataFrame # ------------------------ define the input schema -------------------------------- input_schema = PanDatFactory ( parameters=[["Parameter"], ["Value"]], load_amounts=[["Amount"],[]], number_of_one_way_trips=[["Number"],[]], amount_leftover=[["Amount"], []]) input_schema.set_data_type("load_amounts", "Amount", min=0, max=float("inf"), inclusive_min=False, inclusive_max=False) input_schema.set_data_type("number_of_one_way_trips", "Number", min=0, max=float("inf"), inclusive_min=False, inclusive_max=False, must_be_int=True) input_schema.set_data_type("amount_leftover", "Amount", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False) default_parameters = {"One Way Price": 2.25, "Amount Leftover Constraint": "Upper Bound"} def _good_parameter_key_value(key, value):
from ticdat import PanDatFactory input_schema = PanDatFactory(cities=[["Name"],["Demand"]], distances=[["Source", "Destination"], ["Distance"]], parameters=[["Parameter"], ["Value"]]) input_schema.add_parameter("Number of Centroids", default_value=4, inclusive_min=False, inclusive_max=False, min=0, max=float("inf"), must_be_int=True) input_schema.add_parameter("High Service Distance", default_value=0, inclusive_min=True, inclusive_max=True, min=0, max=float("inf"), must_be_int=False) input_schema.add_parameter("Maximum Average Service Distance", default_value=float("inf"), inclusive_min=True, inclusive_max=True, min=0, max=float("inf"), must_be_int=False) input_schema.add_parameter("Minimum Percent High Service Demand", default_value=0, inclusive_min=True, inclusive_max=True, min=0, max=100, must_be_int=False) input_schema.add_parameter("Maximum Individual Service Distance", default_value=float("inf"), inclusive_min=False, inclusive_max=True, min=0, max=float("inf"), must_be_int=False) input_schema.add_parameter("Objective", "Minimize Average Service Distance", strings_allowed=["Minimize Average Service Distance", "Maximize Percent High Service Demand"], number_allowed=False) input_schema.set_data_type("cities", "Demand", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False) input_schema.set_data_type("distances", "Distance", min=0, max=float("inf"), inclusive_min=True, inclusive_max=False) input_schema.add_foreign_key("distances", "cities", ['Source', 'Name']) input_schema.add_foreign_key("distances", "cities", ['Destination', 'Name']) # The distance matrix is bi-directionally safe. I.e. if the same source/dest and dest/source exist then the # distances must match. If only one is present, it can fall back to the other in the code. def _distance_matrix(dat): return {"distance_matrix": {tuple(row[:2]): row[2] for row in dat.distances.itertuples(index=False)}} input_schema.add_data_row_predicate("distances", predicate_name="Check Bi-Directionally Safe", predicate=lambda row, distance_matrix: ((row["Destination"], row["Source"]) not in distance_matrix) or (row["Distance"] == distance_matrix[row["Destination"], row["Source"]]), predicate_kwargs_maker=_distance_matrix)
# python fantop.py -i fantop_sample_data -o fantop_solution_data from ticdat import PanDatFactory, standard_main try: # if you don't have amplpy installed, the code will still load and then fail on solve from amplpy import AMPL except: AMPL = None # ------------------------ define the input schema -------------------------------- input_schema = PanDatFactory(parameters=[["Parameter"], ["Value"]], players=[['Player Name'], [ 'Position', 'Average Draft Position', 'Expected Points', 'Draft Status' ]], roster_requirements=[['Position'], [ 'Min Num Starters', 'Max Num Starters', 'Min Num Reserve', 'Max Num Reserve', 'Flex Status' ]], my_draft_positions=[['Draft Position'], []]) # add foreign key constraints (optional, but helps with preventing garbage-in, garbage-out) input_schema.add_foreign_key("players", "roster_requirements", ['Position', 'Position']) # set data types (optional, but helps with preventing garbage-in, garbage-out) input_schema.set_data_type("parameters", "Parameter",
# # Demonstrates reading/writing datetime (here, specifically pandas.Timestamp) data # to and from .csv files. # # Command line interface works like this # python simple_datetime_solver.py -i sample_data -o solution_directory # # This is a very simple app that demos datetime functionality that might be useful for a routing application. # A parameter defines the start of the model, and each order has a "Deliver By" time requirement. The solution # (which is just diagnostic information) is the time elapsed (in days) between the start time of the model and the # "Delvery By" time for each order from ticdat import PanDatFactory, standard_main # ------------------------ define the input schema -------------------------------- input_schema = PanDatFactory(parameters=[["Name"], ["Value"]], orders=[["Name"], ["Deliver By"]]) input_schema.set_data_type("orders", "Deliver By", datetime=True) input_schema.add_parameter("Start Of Model", "Jan 1 2019 8 AM", datetime=True) # --------------------------------------------------------------------------------- # ------------------------ define the output schema ------------------------------- solution_schema = PanDatFactory( time_to_deliver=[["Name"], ["Maximum Time To Deliver"]]) # --------------------------------------------------------------------------------- # ------------------------ create a solve function -------------------------------- def solve(dat): assert input_schema.good_pan_dat_object(dat) assert not input_schema.find_duplicates(dat)
def test_diet_amplpy(self): dat = _diet_input_pdf.copy_to_ampl( _diet_dat, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) self.assertTrue({"n_min", "n_max"}.issubset(dat.categories.toPandas().columns)) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_pdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() sln = _diet_sln_pdf.copy_from_ampl_variables({ ("buy_food", "Quantity"): ampl.getVariable("Buy"), ("consume_nutrition", "Quantity"): ampl.getVariable("Consume") }) sln.parameters.loc[0] = [ 'Total Cost', ampl.getObjective('Total_Cost').value() ] _missing_field_pdf = PanDatFactory( **{ t: [pks, (["Max Nutrition"] if t == "categories" else dfs)] for t, (pks, dfs) in _diet_input_pdf.schema().items() }) dat = _missing_field_pdf.copy_to_ampl( _diet_dat, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) self.assertTrue({"n_min", "n_max"}.issubset(dat.categories.toPandas().columns)) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_pdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() sln_2 = _diet_sln_pdf.copy_from_ampl_variables({ ("buy_food", "Quantity"): ampl.getVariable("Buy"), ("consume_nutrition", "Quantity"): ampl.getVariable("Consume") }) sln_2.parameters.loc[0] = [ 'Total Cost', ampl.getObjective('Total_Cost').value() ] self.assertTrue(_diet_sln_pdf._same_data(sln, sln_2)) diet_dat_two = _diet_input_pdf.copy_to_tic_dat(_diet_dat) for r in diet_dat_two.nutrition_quantities.values(): r["Quantity"], r["Other Quantity"] = [0.5 * r["Quantity"]] * 2 diet_dat_two = pan_dat_maker(_diet_input_pdf.schema(), diet_dat_two) dat = _diet_input_pdf.copy_to_ampl( diet_dat_two, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_pdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() self.assertTrue("solved" == ampl.getValue("solve_result")) sln = _diet_sln_pdf.copy_from_ampl_variables({ ("buy_food", "Quantity"): ampl.getVariable("Buy"), ("consume_nutrition", "Quantity"): ampl.getVariable("Consume") }) sln.parameters.loc[0] = [ 'Total Cost', ampl.getObjective('Total_Cost').value() ] self.assertTrue( _diet_sln_pdf._same_data(sln, _diet_sln_pandat, epsilon=1e-5)) dat = _diet_input_pdf.copy_to_ampl( _diet_dat, { ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "", ("categories", "Max Nutrition"): "n_max" }, ["nutrition_quantities"]) self.assertFalse(hasattr(dat, "nutrition_quantities")) self.assertTrue({"n_min", "n_max"}.intersection( dat.categories.toPandas().columns) == {"n_max"}) sln_tdf_2 = PanDatFactory(buy_food=[["Food"], ["Quantity"]], consume_nutrition=[["Category"], []]) sln_tdf_2.set_default_value("buy_food", "Quantity", 1) sln_2 = sln_tdf_2.copy_from_ampl_variables({ ("buy_food", False): ampl.getVariable("Buy"), ("consume_nutrition", False): (ampl.getVariable("Consume"), lambda x: x < 100) }) self.assertTrue(set(sln_2.buy_food["Quantity"]) == {1}) self.assertTrue( set(sln_2.buy_food["Food"]) == set(sln.buy_food["Food"])) self.assertTrue(len(sln_2.consume_nutrition) > 0) self.assertTrue( set(sln_2.consume_nutrition["Category"]) == set(sln.consume_nutrition[sln.consume_nutrition["Quantity"] < 100] ["Category"])) diet_dat_two = _diet_input_pdf.copy_to_tic_dat(_diet_dat) diet_dat_two.categories["calories"] = [0, 200] diet_dat_two = pan_dat_maker(_diet_input_pdf.schema(), diet_dat_two) dat = _diet_input_pdf.copy_to_ampl( diet_dat_two, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_pdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() self.assertTrue("infeasible" == ampl.getValue("solve_result")) diet_dat_two = _diet_input_pdf.copy_to_tic_dat(_diet_dat) for v in diet_dat_two.categories.values(): v["Max Nutrition"] = float("inf") diet_dat_two.foods["hamburger"] = -1 diet_dat_two = pan_dat_maker(_diet_input_pdf.schema(), diet_dat_two) dat = _diet_input_pdf.copy_to_ampl( diet_dat_two, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_pdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() self.assertTrue("unbounded" == ampl.getValue("solve_result"))
# # Perform KMeans-clustering on the Iris data set. Number of clusters can be controlled via an optional # parameters table. # # Command line interface works like this # python iris.py -i sample_data -o solution_directory # from ticdat import PanDatFactory, standard_main from sklearn.preprocessing import scale from sklearn.cluster import KMeans # ------------------------ define the input schema -------------------------------- _core_numeric_fields = [ 'Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width' ] input_schema = PanDatFactory(parameters=[['Name'], ['Value']], iris=[[], _core_numeric_fields + ['Species']]) # the core data fields should be positive, non-infinite numbers for fld in _core_numeric_fields: input_schema.set_data_type("iris", fld, inclusive_min=False, inclusive_max=False, min=0, max=float("inf")) input_schema.set_data_type("iris", 'Species', number_allowed=False, strings_allowed='*') # the number of clusters is our only parameter, but using a parameters table makes it easy to add more as needed
def execute_action(self): """ Performs outlier/anomaly detection on fields/columns based on zScore, inter-quartile range or bounds """ config_dfs = self.read_data('config_schema') # parsing action config table into a dataframe action_config_df = config_dfs.action_settings table_col_dict = {} table_flag_col_dict = {} for _, series in action_config_df.iterrows(): if series['Table'] in table_col_dict.keys(): table_col_dict[series['Table']].add(str(series['Column'])) else: table_col_dict[series['Table']] = {series['Column']} table_flag_col_dict[series['Table']] = { series['Flag ' 'Column'] } # print("table_col_dict", table_col_dict) # print("table_flag_col_dict", table_flag_col_dict) self.data_schema = PanDatFactory( **{ table: [[], list(table_col_dict[table]) + list(table_flag_col_dict[table])] for table in table_col_dict.keys() }) table_dfs = self.read_data('data_schema') # processing data in dataframe for i in range(0, action_config_df.shape[0]): # print("\n *** Executing row", i, "in actions ***") table_name = action_config_df['Table'].iloc[i] col_to_analyze = action_config_df['Column'].iloc[i] flag_column = action_config_df['Flag Column'].iloc[i] # print("Table: ", table_name, ", column to analyze: ", # col_to_analyze, ", flag column: ", flag_column) z_threshold = 0 iqr_multiplier = 0 lb = 0 ub = 0 if action_config_df['Method'].iloc[i] == 'zScore': z_threshold = float(action_config_df['Value'].iloc[i]) table_df = getattr(table_dfs, table_name) numbers_l = [] for item in table_df[col_to_analyze]: numbers_l.append(float(item)) numpy_num_array = np.array(numbers_l) std_dev = np.std(numpy_num_array) mean = np.mean(numbers_l) # print("Mean: ", mean, "Standard deviation: ", std_dev) # print("z_threshold: ", z_threshold) table_df[flag_column] = table_df.apply( lambda row: bool(row[flag_column]) or (True if ( ((float(row[col_to_analyze]) - mean) / std_dev > z_threshold) or ((float(row[col_to_analyze]) - mean) / std_dev < -z_threshold)) else False), axis=1) elif action_config_df['Method'].iloc[i] == 'IQR': iqr_multiplier = float(action_config_df['Value'].iloc[i]) table_df = getattr(table_dfs, table_name) numbers_l = [] for item in table_df[col_to_analyze]: numbers_l.append(float(item)) numpy_num_array = np.array(numbers_l) q75, q25 = np.percentile(numpy_num_array, [75, 25]) iqr = q75 - q25 # print("First percentile: ", q25, "Third percentile: ", q75, # "Inter quartile range: ", iqr) # print("IQR multiplier: ", iqr_multiplier) table_df[flag_column] = table_df.apply( lambda row: bool(row[flag_column]) or (True if ( (float(row[col_to_analyze]) > q75 + (iqr_multiplier * iqr)) or (float(row[col_to_analyze]) < q25 - (iqr_multiplier * iqr))) else False), axis=1) elif action_config_df['Method'].iloc[i] == 'range': lb, ub = action_config_df['Value'].iloc[i].split(',') # print("Lower bound = ", lb, # ", Upper bound =", ub) table_df = getattr(table_dfs, table_name) table_df[flag_column] = table_df.apply( lambda row: bool(row[flag_column]) or (True if ( (float(row[col_to_analyze]) > float(ub)) or (float(row[col_to_analyze]) < float(lb))) else False), axis=1) else: print("Error: Please enter a valid value in Method to use (" "zScore, IQR, range).") # writing data self.write_data(table_dfs) exit()
def test_diet_pd(self): if not self.can_run: return schema = "test_pg_diet" tdf = diet_schema pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) pdf.set_infinity_io_flag(1e12) pgpf = pdf.pgsql pan_dat = pan_dat_maker(tdf.schema(), diet_dat) pgpf.write_schema(self.engine, schema, include_ancillary_info=False) pgpf.write_data(pan_dat, self.engine, schema) pg_pan_dat = pgpf.create_pan_dat(self.engine, schema) self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat)) pdf.set_infinity_io_flag(None) pg_pan_dat_none_inf = pdf.pgsql.create_pan_dat(self.engine, schema) self.assertFalse(pdf._same_data(pan_dat, pg_pan_dat_none_inf)) pg_pan_dat_none_inf.categories.loc[pg_pan_dat_none_inf.categories["Name"] == "protein", "Max Nutrition"] = \ float("inf") self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat_none_inf)) pdf.set_infinity_io_flag("N/A") dat2 = diet_schema.copy_tic_dat(diet_dat) dat2.foods["za"] = dat2.foods.pop("pizza") dat2 = pan_dat_maker(tdf.schema(), dat2) pgpf.write_data(dat2, self.engine, schema, pre_existing_rows={"foods": "append"}) dat3 = pgpf.create_pan_dat(self.engine, schema) self.assertTrue(set(pdf.find_duplicates(dat3)) == {'foods'}) self.assertTrue(set(dat3.foods["Name"]).issuperset(dat2.foods["Name"])) self.assertTrue( set(dat3.foods["Name"]).issuperset(pan_dat.foods["Name"])) self.assertTrue( set(dat3.foods["Name"]).difference(pan_dat.foods["Name"]) == {'za'}) self.assertTrue( set(dat3.foods["Name"]).difference(dat2.foods["Name"]) == {'pizza'}) pgpf.write_data(dat2, self.engine, schema, pre_existing_rows={"nutrition_quantities": "append"}) dat4 = pgpf.create_pan_dat(self.engine, schema) self.assertTrue( set(pdf.find_duplicates(dat4)) == {'nutrition_quantities'} and not pdf.find_duplicates(dat2)) dat4.nutrition_quantities = dat4.nutrition_quantities[:36] self.assertFalse(pdf.find_duplicates(dat4)) self.assertTrue(pdf._same_data(dat2, dat4)) test_schema_2 = schema + "_none_inf" pdf.set_infinity_io_flag(None) pgpf.write_schema(self.engine, test_schema_2) pgpf.write_data(pan_dat, self.engine, test_schema_2) pdf.set_infinity_io_flag("N/A") pg_pan_dat = pgpf.create_pan_dat(self.engine, test_schema_2) self.assertFalse(pdf._same_data(pan_dat, pg_pan_dat)) pg_pan_dat.categories.loc[pg_pan_dat.categories["Name"] == "protein", "Max Nutrition"] = float("inf") self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat)) pdf.set_infinity_io_flag(None) pg_pan_dat_none_inf = pgpf.create_pan_dat(self.engine, test_schema_2) self.assertTrue(pdf._same_data(pan_dat, pg_pan_dat_none_inf)) pdf_ = PanDatFactory(**diet_schema.schema()) # doesnt have data types pdf_.set_infinity_io_flag(None) pgpf_null_inf = pdf_.pgsql pg_pan_dat_none_inf = pgpf_null_inf.create_pan_dat( self.engine, test_schema_2) self.assertFalse(pdf._same_data(pan_dat, pg_pan_dat_none_inf)) self.assertTrue( math.isnan(pg_pan_dat_none_inf.categories[ pg_pan_dat_none_inf.categories["Name"] == "protein"] ["Max Nutrition"][0]))
# # Core engine file for tts_netflow_b # try: import gurobipy as gp except: gp = None from ticdat import PanDatFactory, Slicer # ------------------------ define the input schema -------------------------------- input_schema = PanDatFactory(commodities=[["Name"], ["Volume"]], nodes=[["Name"], []], arcs=[["Source", "Destination"], ["Capacity"]], cost=[["Commodity", "Source", "Destination"], ["Cost"]], supply=[["Commodity", "Node"], ["Quantity"]], demand=[["Commodity", "Node"], ["Quantity"]]) # Define the foreign key relationships input_schema.add_foreign_key("arcs", "nodes", ['Source', 'Name']) input_schema.add_foreign_key("arcs", "nodes", ['Destination', 'Name']) input_schema.add_foreign_key( "cost", "arcs", [['Source', 'Source'], ['Destination', 'Destination']]) input_schema.add_foreign_key("cost", "commodities", ['Commodity', 'Name']) input_schema.add_foreign_key("demand", "commodities", ['Commodity', 'Name']) input_schema.add_foreign_key("demand", "nodes", ['Node', 'Name']) input_schema.add_foreign_key("supply", "commodities", ['Commodity', 'Name']) input_schema.add_foreign_key("supply", "nodes", ['Node', 'Name']) # Define the data types input_schema.set_data_type("commodities",
param n_max {i in CAT} >= n_min[i]; param amt {FOOD, CAT} >= 0; param other_amt {FOOD, CAT} >= 0; var Buy {j in FOOD} >= 0; var Consume {i in CAT } >= n_min [i], <= n_max [i]; minimize Total_Cost: sum {j in FOOD} cost[j] * Buy[j]; subject to Diet {i in CAT}: Consume[i] = sum {j in FOOD} (amt[j,i] + other_amt[j,i]) * Buy[j]; """ _diet_input_pdf = PanDatFactory( categories=[["Name"], ["Min Nutrition", "Max Nutrition"]], foods=[["Name"], ["Cost"]], nutrition_quantities=[["Food", "Category"], ["Quantity", "Other Quantity"]]) _diet_pan_dat_from_dict = lambda pd_dict: _pan_dat_maker_from_dict( _diet_input_pdf.schema(), pd_dict) _diet_dat = _pan_dat_maker_from_dict( _diet_input_pdf.schema(), { 'categories': { u'calories': { 'Max Nutrition': 2200.0, 'Min Nutrition': 1800 }, u'fat': { 'Max Nutrition': 65.0, 'Min Nutrition': 0 },