Exemple #1
0
def pair_partition_to_vec(input_data: Tuple[Dict, Tuple[str,str], Iterable[Union[PosNegExample, UnlabeledExample, np.ndarray]]]):
    processed_specs, fields, partiton_data = input_data

    columns = get_nested_index(fields)
    dfs = []

    for example in partiton_data:
        Encoding.encoding_cnt = 0

        # hack to get named tuples to work in parallel
        if isinstance(example, np.ndarray):
            example = PosNegExample(*example)

        # use numbers because we odn't know the names here
        neg_feature_vec = count_violations_memoized(processed_specs,
                            Task(example.data, Query.from_vegalite(example[4]), example.task))
        pos_feature_vec = count_violations_memoized(processed_specs,
                            Task(example.data, Query.from_vegalite(example[5]), example.task))

        # Reformat the json data so that we can insert it into a multi index data frame.
        # https://stackoverflow.com/questions/24988131/nested-dictionary-to-multiindex-dataframe-where-dictionary-keys-are-column-label
        specs = {(fields[0], key): values for key, values in neg_feature_vec.items()}
        specs.update({(fields[1], key): values for key, values in pos_feature_vec.items()})

        specs[('source', '')] = example.source
        specs[('task', '')] = example.task

        dfs.append(pd.DataFrame(specs, columns=columns, index=[example.pair_id]))

    return pd.concat(dfs)
Exemple #2
0
def main():  # pragma: no cover
    parser = create_parser()
    args = parser.parse_args()

    if args.mode != Mode.optimize and (args.type == QueryType.draco
                                       or args.type == QueryType.cql):
        print('Validation only works with full specs.', sys.stderr)
    else:
        logger.info(f'Processing query: {args.query.name} ...')

        if args.type == QueryType.asp:
            input_task = AspTask(args.query.read())
        else:
            # load a task from a spec provided by the user
            query_spec = json.load(args.query)
            d = args.base or os.path.dirname(args.query.name)
            if args.type == QueryType.draco:
                input_task = Task.from_obj(query_spec, d)
            elif args.type == QueryType.cql:
                input_task = Task.from_cql(query_spec, d)
            elif args.type == QueryType.vl:
                input_task = Task.from_vegalite(query_spec, d)

        if args.mode == Mode.violations:
            task = run(input_task,
                       debug=args.debug,
                       files=['define.lp', 'hard.lp', 'soft.lp', 'output.lp'],
                       silence_warnings=True)

            if task:
                print(task.violations, file=args.out)
        elif args.mode == Mode.valid:
            task = run(input_task,
                       debug=args.debug,
                       files=['define.lp', 'hard.lp', 'output.lp'],
                       silence_warnings=True)

            print('valid' if task else 'invalid', file=args.out)
        elif args.mode == Mode.optimize:
            task = run(input_task, debug=args.debug)

            if task:
                print(task.to_vegalite_json(), file=args.out)
                logger.info(f'Cost: {task.cost}')
                outname = 'stringIO' if isinstance(
                    args.out, io.StringIO) else args.out.name
                logger.info(f'Wrote Vega-Lite spec to {outname}')

    # close open files
    if args.query is not sys.stdin:
        args.query.close()

    if args.out is not sys.stdout:
        args.out.close()
Exemple #3
0
def run(task: Task, constants: Dict[str, str] = None, files: List[str] = None, silence_warnings=False, debug=False, clear_cache=False) -> Optional[Task]:
    ''' Run clingo to compute a completion of a partial spec or violations. '''

    # Clear file cache. useful during development in notebooks.
    if clear_cache and file_cache:
        logger.warning('Cleared file cache')
        file_cache.clear()

    stderr, stdout = run_draco(task, constants, files, silence_warnings, debug)

    try:
        json_result = json.loads(stdout)
    except json.JSONDecodeError:
        logger.error('stdout: %s', stdout)
        logger.error('stderr: %s', stderr)
        raise

    if stderr:
        logger.error(stderr)

    result = json_result['Result']

    if result == 'UNSATISFIABLE':
        logger.info('Constraints are unsatisfiable.')
        return None
    elif result == 'OPTIMUM FOUND':
        # get the last witness, which is the best result
        answers = json_result['Call'][0]['Witnesses'][-1]

        logger.debug(answers['Value'])

        return Task.parse_from_answer(
            clyngor.Answers(answers['Value']).sorted,
            data=task.data,
            cost=json_result['Models']['Costs'][0])
    elif result == 'SATISFIABLE':
        answers = json_result['Call'][0]['Witnesses'][-1]

        assert json_result['Models']['Number'] == 1, 'Should not have more than one model if we don\'t optimize'

        logger.debug(answers['Value'])

        return Task.parse_from_answer(
            clyngor.Answers(answers['Value']).sorted,
            data=task.data)
    else:
        logger.error('Unsupported result: %s', result)
        return None
Exemple #4
0
    def test_stack_agg(self):
        query = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'x': {
                    'type': 'nominal',
                    'field': 'n1',
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'stack': 'zero',
                    'aggregate': 'sum'
                },
                'detail': {
                    'type': 'nominal',
                    'field': 'n2'
                },
                'color': {
                    'type': 'quantitative',
                    'field': 'q2',
                    'aggregate': 'mean'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
Exemple #5
0
def count_violations_memoized(processed_specs: Dict[str, Dict], task: Task):
    key = task.to_asp()
    if key not in processed_specs:
        violations = count_violations(task)
        if violations is not None:
            processed_specs[key] = violations
    return processed_specs[key]
Exemple #6
0
    def test_row_only(self):
        query = Query.from_vegalite({
            'mark': 'point',
            'encoding': {
                'row': {
                    'type': 'nominal',
                    'field': 'n1'
                }
            }
        })

        assert is_valid(Task(data, query), True) == False
Exemple #7
0
    def test_only_one_agg(self):
        query = Query.from_vegalite({
            'mark': 'point',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1'
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q2',
                    'aggregate': 'mean'
                }
            }
        })

        assert is_valid(Task(data, query), True) == False
Exemple #8
0
    def test_heatmap(self):
        query = Query.from_vegalite({
            'mark': 'rect',
            'encoding': {
                'x': {
                    'type': 'nominal',
                    'field': 'n1',
                },
                'y': {
                    'type': 'ordinal',
                    'field': 'q1',
                    'bin': True
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
Exemple #9
0
    def test_hist(self):
        query = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'bin': True
                },
                'y': {
                    'type': 'quantitative',
                    'aggregate': 'count'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
Exemple #10
0
def generate_visual_pairs(partial_full_data, weights):
    # Generate pairs that can be visualized by bug finders
    result = {}
    result["headers"] = {
        "first": {
            "title": "Draco",
            "subtitle": "Draco Prediction"
        },
        "second": {
            "title": "CQL",
            "subtitle": "Compassql Prediction"
        }
    }

    result["specs"] = []
    for case in partial_full_data:
        partial_spec, full_spec = partial_full_data[case]

        draco_rec = run(Task.from_cql(partial_spec), constants=weights)

        if draco_rec is None:
            logger.warning(f'Could not find a spec for {partial_spec}')

            result["specs"].append({
                "first": None,
                "second": full_spec,
                "properties": {
                    "input": partial_spec
                }
            })

            continue

        result["specs"].append({
            "first": draco_rec.to_vegalite(),
            "second": full_spec,
            "properties": {
                "input": partial_spec
            }
        })

    return result
Exemple #11
0
def test_count_violations():
    data = Data.from_csv('examples/data/cars.csv')
    query_json = {
        'mark': 'bar',
        'data': {
            'url': 'data/cars.csv'
        },
        'encoding': {
            'x': {
                'field': 'origin',
                'type': 'ordinal'
            },
            'y': {
                'field': 'horsepower',
                'type': 'quantitative',
                'aggregate': 'mean'
            }
        }
    }
    violations = count_violations(Task(data, Query.from_vegalite(query_json)))

    assert 'encoding' in violations.keys()
    assert violations.get('encoding') == 2
Exemple #12
0
def run_draco(task: Task, constants: Dict[str, str] = None, files: List[str] = None, silence_warnings=False, debug=False) -> Tuple[str, str]:
    '''
    Run draco and return stderr and stdout
    '''

    # default args
    files = files or DRACO_LP
    constants = constants or {}

    options = ['--outf=2', '--quiet=1,2,2']
    if silence_warnings:
        options.append('--warn=no-atom-undefined')
    for name, value in constants.items():
        options.append(f'-c {name}={value}')

    cmd = ['clingo'] + options
    logger.debug('Command: %s', ' '.join(cmd))

    proc = subprocess.Popen(
        args=cmd,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE)

    task_program = task.to_asp()
    file_names = [os.path.join(DRACO_LP_DIR, f) for f in files]
    asp_program = b'\n'.join(map(load_file, file_names)) + task_program.encode('utf8')

    if debug:
        with tempfile.NamedTemporaryFile(mode='w', delete=False) as fd:
            fd.write(task_program)

            logger.info('Debug ASP with "clingo %s %s"', ' '.join(file_names), fd.name)

    stdout, stderr = proc.communicate(asp_program)

    return (stderr, stdout)
Exemple #13
0
    def test_stack_q_q(self):
        query = Query.from_vegalite({
            'mark': 'area',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'scale': {
                        'zero': False
                    }
                },
                'y': {
                    'type': 'quantitative',
                    'field': 'q2',
                    'stack': 'zero'
                },
                'color': {
                    'type': 'nominal',
                    'field': 'n1'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True