def pair_partition_to_vec(input_data: Tuple[Dict, Tuple[str,str], Iterable[Union[PosNegExample, UnlabeledExample, np.ndarray]]]): processed_specs, fields, partiton_data = input_data columns = get_nested_index(fields) dfs = [] for example in partiton_data: Encoding.encoding_cnt = 0 # hack to get named tuples to work in parallel if isinstance(example, np.ndarray): example = PosNegExample(*example) # use numbers because we odn't know the names here neg_feature_vec = count_violations_memoized(processed_specs, Task(example.data, Query.from_vegalite(example[4]), example.task)) pos_feature_vec = count_violations_memoized(processed_specs, Task(example.data, Query.from_vegalite(example[5]), example.task)) # Reformat the json data so that we can insert it into a multi index data frame. # https://stackoverflow.com/questions/24988131/nested-dictionary-to-multiindex-dataframe-where-dictionary-keys-are-column-label specs = {(fields[0], key): values for key, values in neg_feature_vec.items()} specs.update({(fields[1], key): values for key, values in pos_feature_vec.items()}) specs[('source', '')] = example.source specs[('task', '')] = example.task dfs.append(pd.DataFrame(specs, columns=columns, index=[example.pair_id])) return pd.concat(dfs)
def main(): # pragma: no cover parser = create_parser() args = parser.parse_args() if args.mode != Mode.optimize and (args.type == QueryType.draco or args.type == QueryType.cql): print('Validation only works with full specs.', sys.stderr) else: logger.info(f'Processing query: {args.query.name} ...') if args.type == QueryType.asp: input_task = AspTask(args.query.read()) else: # load a task from a spec provided by the user query_spec = json.load(args.query) d = args.base or os.path.dirname(args.query.name) if args.type == QueryType.draco: input_task = Task.from_obj(query_spec, d) elif args.type == QueryType.cql: input_task = Task.from_cql(query_spec, d) elif args.type == QueryType.vl: input_task = Task.from_vegalite(query_spec, d) if args.mode == Mode.violations: task = run(input_task, debug=args.debug, files=['define.lp', 'hard.lp', 'soft.lp', 'output.lp'], silence_warnings=True) if task: print(task.violations, file=args.out) elif args.mode == Mode.valid: task = run(input_task, debug=args.debug, files=['define.lp', 'hard.lp', 'output.lp'], silence_warnings=True) print('valid' if task else 'invalid', file=args.out) elif args.mode == Mode.optimize: task = run(input_task, debug=args.debug) if task: print(task.to_vegalite_json(), file=args.out) logger.info(f'Cost: {task.cost}') outname = 'stringIO' if isinstance( args.out, io.StringIO) else args.out.name logger.info(f'Wrote Vega-Lite spec to {outname}') # close open files if args.query is not sys.stdin: args.query.close() if args.out is not sys.stdout: args.out.close()
def run(task: Task, constants: Dict[str, str] = None, files: List[str] = None, silence_warnings=False, debug=False, clear_cache=False) -> Optional[Task]: ''' Run clingo to compute a completion of a partial spec or violations. ''' # Clear file cache. useful during development in notebooks. if clear_cache and file_cache: logger.warning('Cleared file cache') file_cache.clear() stderr, stdout = run_draco(task, constants, files, silence_warnings, debug) try: json_result = json.loads(stdout) except json.JSONDecodeError: logger.error('stdout: %s', stdout) logger.error('stderr: %s', stderr) raise if stderr: logger.error(stderr) result = json_result['Result'] if result == 'UNSATISFIABLE': logger.info('Constraints are unsatisfiable.') return None elif result == 'OPTIMUM FOUND': # get the last witness, which is the best result answers = json_result['Call'][0]['Witnesses'][-1] logger.debug(answers['Value']) return Task.parse_from_answer( clyngor.Answers(answers['Value']).sorted, data=task.data, cost=json_result['Models']['Costs'][0]) elif result == 'SATISFIABLE': answers = json_result['Call'][0]['Witnesses'][-1] assert json_result['Models']['Number'] == 1, 'Should not have more than one model if we don\'t optimize' logger.debug(answers['Value']) return Task.parse_from_answer( clyngor.Answers(answers['Value']).sorted, data=task.data) else: logger.error('Unsupported result: %s', result) return None
def test_stack_agg(self): query = Query.from_vegalite({ 'mark': 'bar', 'encoding': { 'x': { 'type': 'nominal', 'field': 'n1', }, 'y': { 'type': 'quantitative', 'field': 'q1', 'stack': 'zero', 'aggregate': 'sum' }, 'detail': { 'type': 'nominal', 'field': 'n2' }, 'color': { 'type': 'quantitative', 'field': 'q2', 'aggregate': 'mean' } } }) assert is_valid(Task(data, query), True) == True
def count_violations_memoized(processed_specs: Dict[str, Dict], task: Task): key = task.to_asp() if key not in processed_specs: violations = count_violations(task) if violations is not None: processed_specs[key] = violations return processed_specs[key]
def test_row_only(self): query = Query.from_vegalite({ 'mark': 'point', 'encoding': { 'row': { 'type': 'nominal', 'field': 'n1' } } }) assert is_valid(Task(data, query), True) == False
def test_only_one_agg(self): query = Query.from_vegalite({ 'mark': 'point', 'encoding': { 'x': { 'type': 'quantitative', 'field': 'q1' }, 'y': { 'type': 'quantitative', 'field': 'q2', 'aggregate': 'mean' } } }) assert is_valid(Task(data, query), True) == False
def test_heatmap(self): query = Query.from_vegalite({ 'mark': 'rect', 'encoding': { 'x': { 'type': 'nominal', 'field': 'n1', }, 'y': { 'type': 'ordinal', 'field': 'q1', 'bin': True } } }) assert is_valid(Task(data, query), True) == True
def test_hist(self): query = Query.from_vegalite({ 'mark': 'bar', 'encoding': { 'x': { 'type': 'quantitative', 'field': 'q1', 'bin': True }, 'y': { 'type': 'quantitative', 'aggregate': 'count' } } }) assert is_valid(Task(data, query), True) == True
def generate_visual_pairs(partial_full_data, weights): # Generate pairs that can be visualized by bug finders result = {} result["headers"] = { "first": { "title": "Draco", "subtitle": "Draco Prediction" }, "second": { "title": "CQL", "subtitle": "Compassql Prediction" } } result["specs"] = [] for case in partial_full_data: partial_spec, full_spec = partial_full_data[case] draco_rec = run(Task.from_cql(partial_spec), constants=weights) if draco_rec is None: logger.warning(f'Could not find a spec for {partial_spec}') result["specs"].append({ "first": None, "second": full_spec, "properties": { "input": partial_spec } }) continue result["specs"].append({ "first": draco_rec.to_vegalite(), "second": full_spec, "properties": { "input": partial_spec } }) return result
def test_count_violations(): data = Data.from_csv('examples/data/cars.csv') query_json = { 'mark': 'bar', 'data': { 'url': 'data/cars.csv' }, 'encoding': { 'x': { 'field': 'origin', 'type': 'ordinal' }, 'y': { 'field': 'horsepower', 'type': 'quantitative', 'aggregate': 'mean' } } } violations = count_violations(Task(data, Query.from_vegalite(query_json))) assert 'encoding' in violations.keys() assert violations.get('encoding') == 2
def run_draco(task: Task, constants: Dict[str, str] = None, files: List[str] = None, silence_warnings=False, debug=False) -> Tuple[str, str]: ''' Run draco and return stderr and stdout ''' # default args files = files or DRACO_LP constants = constants or {} options = ['--outf=2', '--quiet=1,2,2'] if silence_warnings: options.append('--warn=no-atom-undefined') for name, value in constants.items(): options.append(f'-c {name}={value}') cmd = ['clingo'] + options logger.debug('Command: %s', ' '.join(cmd)) proc = subprocess.Popen( args=cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) task_program = task.to_asp() file_names = [os.path.join(DRACO_LP_DIR, f) for f in files] asp_program = b'\n'.join(map(load_file, file_names)) + task_program.encode('utf8') if debug: with tempfile.NamedTemporaryFile(mode='w', delete=False) as fd: fd.write(task_program) logger.info('Debug ASP with "clingo %s %s"', ' '.join(file_names), fd.name) stdout, stderr = proc.communicate(asp_program) return (stderr, stdout)
def test_stack_q_q(self): query = Query.from_vegalite({ 'mark': 'area', 'encoding': { 'x': { 'type': 'quantitative', 'field': 'q1', 'scale': { 'zero': False } }, 'y': { 'type': 'quantitative', 'field': 'q2', 'stack': 'zero' }, 'color': { 'type': 'nominal', 'field': 'n1' } } }) assert is_valid(Task(data, query), True) == True