Ejemplo n.º 1
0
def load_neg_pos_specs() -> Dict[str, PosNegExample]:
    raw_data = {}

    for path in [man_data_path, yh_data_path, ba_data_path, label_data_path]:
        with open(path) as f:
            i = 0
            json_data = json.load(f)

            for row in json_data['data']:
                fields = list(map(Field.from_obj, row['fields']))
                spec_schema = Data(fields, row.get('num_rows'))
                src = json_data['source']

                key = f'{src}-{i}'
                raw_data[key] = PosNegExample(
                    key,
                    spec_schema,
                    row.get('task'),
                    src,
                    row['negative'],
                    row['positive']
                )

                i += 1

    return raw_data
Ejemplo n.º 2
0
def load_neg_pos_specs() -> Dict[str, PosNegExample]:
    raw_data = {}

    for path in [man_data_path, yh_data_path, ba_data_path, label_data_path]:
        with open(path) as f:
            i = 0
            json_data = json.load(f)

            for row in json_data["data"]:
                fields = list(map(Field.from_obj, row["fields"]))
                spec_schema = Data(fields, row.get("num_rows"))
                src = json_data["source"]

                key = f"{src}-{i}"
                raw_data[key] = PosNegExample(
                    key,
                    spec_schema,
                    row.get("task"),
                    src,
                    row["negative"],
                    row["positive"],
                )

                i += 1

    return raw_data
Ejemplo n.º 3
0
 def acquire_data(url):
     if url not in data_cache:
         data_cache[url] = Data.from_json(
             os.path.join(data_dir, os.path.basename(url)))
         # set the url to short name, since the one above set it to full name in the current machine
         data_cache[url].url = url
     return data_cache[url]
Ejemplo n.º 4
0
    def __init__(self, distributions: Dict, type_distribution: Dict,
                 definitions: Dict, data_schema: Dict, data_url: str) -> None:
        top_level_props = definitions['topLevelProps']
        encoding_props = definitions['encodingProps']
        data_fields = [
            Field(x['name'], x['type'], cardinality=x['cardinality'])
            for x in data_schema
        ]

        self.model = Model(data_fields, distributions, type_distribution,
                           top_level_props, encoding_props)
        self.data = Data(data_fields)
        self.data_url = data_url
Ejemplo n.º 5
0
def test_is_valid():
    data = Data(fields=[Field('foo', 'number')])

    invalid = Query.from_vegalite({
        'mark': 'text',
        'encoding': {
            'x': {'field': 'foo', 'type': 'quantitative'}
        }
    })

    assert is_valid(Task(data, invalid)) == False

    valid = Query.from_vegalite({
        'mark': 'point',
        'encoding': {
            'x': {'field': 'foo', 'type': 'quantitative'}
        }
    })
    assert is_valid(Task(data, valid)) == True
Ejemplo n.º 6
0
def test_count_violations():
    data = Data.from_csv('examples/data/cars.csv')
    query_json = {
        'mark': 'bar',
        'data': {
            'url': 'data/cars.csv'
        },
        'encoding': {
            'x': {
                'field': 'origin',
                'type': 'ordinal'
            },
            'y': {
                'field': 'horsepower',
                'type': 'quantitative',
                'aggregate': 'mean'
            }
        }
    }
    violations = count_violations(Task(data, Query.from_vegalite(query_json)))

    assert 'encoding' in violations.keys()
    assert violations.get('encoding') == 2
Ejemplo n.º 7
0
    def __init__(
        self,
        distributions: Dict,
        type_distribution: Dict,
        definitions: Dict,
        data_schema: Dict,
        data_url: str,
    ) -> None:
        top_level_props = definitions["topLevelProps"]
        encoding_props = definitions["encodingProps"]
        data_fields = [
            Field(x["name"], x["type"], cardinality=x["cardinality"])
            for x in data_schema
        ]

        self.model = Model(
            data_fields,
            distributions,
            type_distribution,
            top_level_props,
            encoding_props,
        )
        self.data = Data(data_fields)
        self.data_url = data_url
Ejemplo n.º 8
0
def get_rec(data, query):
    query = Query.from_obj(query)
    input_task = Task(data, query)
    return run(input_task)


def run_spec(data, spec):
    query = Query.from_vegalite(spec)
    input_task = Task(data, query)
    return run(input_task)


spec_schema = Data([
    Field('q1', 'number', 100, 1),
    Field('q2', 'number', 100, 1),
    Field('o1', 'number', 6, 1),
    Field('n1', 'string', 5, 1)
],
                   100,
                   url='data.csv')


class TestSpecs():
    def test_scatter(self):
        recommendation = get_rec(spec_schema, {
            'encoding': [{
                'channel': 'x',
                'field': 'q1'
            }, {
                'field': 'q2'
            }]
        }).to_vegalite()
Ejemplo n.º 9
0
from draco.spec import Data, Field, Query, Task
from draco.generation.helper import is_valid

data = Data(fields=[
    Field('n1', 'string'),
    Field('n2', 'string'),
    Field('q1', 'number'),
    Field('q2', 'number'),
    Field('q3', 'number')
])


class TestValidSpecs():
    def test_hist(self):
        query = Query.from_vegalite({
            'mark': 'bar',
            'encoding': {
                'x': {
                    'type': 'quantitative',
                    'field': 'q1',
                    'bin': True
                },
                'y': {
                    'type': 'quantitative',
                    'aggregate': 'count'
                }
            }
        })

        assert is_valid(Task(data, query), True) == True
Ejemplo n.º 10
0
 def test_generate(self):
     data = Data(fields=[Field('foo', 'number', 10, 2.1, [1,2])])
     assert data.content == None
     data.fill_with_random_content()
     assert data.content != None
Ejemplo n.º 11
0
    def test_to_asp(self):
        data = Data([Field('foo', 'number', 10, 0.4)], 42)

        assert data.to_asp() == 'num_rows(42).\n\nfieldtype(foo,number).\ncardinality(foo,10).\nentropy(foo,4).\n'