Exemple #1
0
def populate(limit):

    datasets = get_datasets(limit)
    pairs = get_pairs(datasets)

    for pair in pairs:
        d1 = pair[0]
        d2 = pair[1]

        print(f"Adding database {d1['name']}.")
        print(f"Adding database {d2['name']}.")

        distance = compare_dataset(d1, d2)

        print(f"Distance between {d1['name']} and {d2['name']} is {distance}.")

        dataset_1 = Dataset(did=d1['did'],
                            name=d1['name'],
                            file_format=d1['format'])
        dataset_2 = Dataset(did=d2['did'],
                            name=d2['name'],
                            file_format=d2['format'])
        dataset_1.save()
        dataset_2.save()
        dataset_1.add_connections(dataset_2, distance)
    def mutate(self, info, did):

        openml_dataset = openml.datasets.get_dataset(did)

        dataset = Dataset(did=did).fetch()
        if dataset is None:
            dataset = Dataset(did=did,
                              name=openml_dataset.name,
                              file_format=openml_dataset.format)
            dataset.save()
            dataset.connect_all()
        else:
            dataset.connect_all()

        return AddDataset(dataset=dataset, ok=True)
Exemple #3
0
def populate_tasks():

    datasets = list(Dataset().all)

    for dataset in datasets:
        current = dataset.get_tasks()
        print(f"On dataset {dataset}")
        new = get_tasks(dataset.did)
        for new_task in new:
            task_obj = Task(tid=new_task['tid']).fetch()
            if task_obj not in current:
                task_obj = Task(
                    tid=new_task['tid'],
                    task_type=new_task['task_type'],
                    task_type_id=new_task['ttid'],
                )
                for key, value in new_task.items():
                    if hasattr(task_obj, key):
                        setattr(task_obj, key, value)
                task_obj.save()
                print(f"Adding new task {task_obj}")
                dataset.add_task(task_obj)
Exemple #4
0
def populate_datasets(limit):

    datasets = get_datasets(limit)

    for dataset in datasets:
        datasets = get_datasets(limit)
        current = list(Dataset().all)
        dataset_obj = Dataset(did=dataset['did']).fetch()
        if dataset_obj in current:
            connections = dataset_obj.get_connections()
            others = current[:]
            others.remove(dataset_obj)
            unconnected = list(set(others) - set(connections))
            if len(unconnected) > 1:
                for to_connect in unconnected:
                    print(f"Connecting {to_connect} to {dataset_obj}.")
                    distance = compare_dataset(dataset['did'], to_connect.did)
                    dataset_obj.add_connections(to_connect, distance)
                    dataset_obj.save()
            else:
                print(f"Dataset {dataset_obj} is fully connected.")
        else:
            new = Dataset(
                did=dataset['did'],
                name=dataset['name'],
                file_format=dataset['format'],
            )
            print(f"Created new {new}.")
            new.save()
            for to_connect in current:
                print(f"Connecting {to_connect} to {new}.")
                distance = compare_dataset(dataset['did'], to_connect.did)
                new.add_connections(new, distance)
                new.save()
Exemple #5
0
def populate(limit):

    datasets = get_datasets(limit)
    pairs = get_pairs(datasets)

    for pair in pairs:
        d1 = pair[0]
        d2 = pair[1]


        if Dataset(did=d1['did']).fetch() != None \
            and Dataset(did=d2['did']).fetch() != None:

            print(
                f"Datasets {d1['name']} and {d2['name']} already in database.")
            continue
        else:
            distance = compare_dataset(d1, d2)

            print(
                f"Distance between {d1['name']} and {d2['name']} is {distance}."
            )

            dataset_1 = Dataset(did=d1['did'],
                                name=d1['name'],
                                file_format=d1['format'])
            dataset_2 = Dataset(did=d2['did'],
                                name=d2['name'],
                                file_format=d2['format'])
            dataset_1.save()
            dataset_2.save()
            dataset_1.add_connections(dataset_2, distance)
 def resolve_similar_tasks(self, info, **kwargs):
     did = kwargs.get('did')
     task_type_id = kwargs.get('task_type_id')
     dataset = Dataset(did=did).fetch()
     tasks = dataset.get_similar_tasks(task_type_id)
     return [TaskSchema(**task.as_dict()) for task in tasks]
 def resolve_close_connections(self, info, **kwargs):
     did = kwargs.get('did')
     distance = kwargs.get('distance')
     target = Dataset(did=did).fetch()
     return target.get_close_connections(distance)
 def resolve_datasets(self, info):
     return [
         DatasetSchema(**dataset.as_dict()) for dataset in Dataset().all
     ]
 def resolve_dataset(self, info, did):
     dataset = Dataset(did=did).fetch()
     return DatasetSchema(**dataset.as_dict())
Exemple #10
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.dataset = Dataset(did=self.did).fetch()