def populate(limit): datasets = get_datasets(limit) pairs = get_pairs(datasets) for pair in pairs: d1 = pair[0] d2 = pair[1] print(f"Adding database {d1['name']}.") print(f"Adding database {d2['name']}.") distance = compare_dataset(d1, d2) print(f"Distance between {d1['name']} and {d2['name']} is {distance}.") dataset_1 = Dataset(did=d1['did'], name=d1['name'], file_format=d1['format']) dataset_2 = Dataset(did=d2['did'], name=d2['name'], file_format=d2['format']) dataset_1.save() dataset_2.save() dataset_1.add_connections(dataset_2, distance)
def mutate(self, info, did): openml_dataset = openml.datasets.get_dataset(did) dataset = Dataset(did=did).fetch() if dataset is None: dataset = Dataset(did=did, name=openml_dataset.name, file_format=openml_dataset.format) dataset.save() dataset.connect_all() else: dataset.connect_all() return AddDataset(dataset=dataset, ok=True)
def populate_tasks(): datasets = list(Dataset().all) for dataset in datasets: current = dataset.get_tasks() print(f"On dataset {dataset}") new = get_tasks(dataset.did) for new_task in new: task_obj = Task(tid=new_task['tid']).fetch() if task_obj not in current: task_obj = Task( tid=new_task['tid'], task_type=new_task['task_type'], task_type_id=new_task['ttid'], ) for key, value in new_task.items(): if hasattr(task_obj, key): setattr(task_obj, key, value) task_obj.save() print(f"Adding new task {task_obj}") dataset.add_task(task_obj)
def populate_datasets(limit): datasets = get_datasets(limit) for dataset in datasets: datasets = get_datasets(limit) current = list(Dataset().all) dataset_obj = Dataset(did=dataset['did']).fetch() if dataset_obj in current: connections = dataset_obj.get_connections() others = current[:] others.remove(dataset_obj) unconnected = list(set(others) - set(connections)) if len(unconnected) > 1: for to_connect in unconnected: print(f"Connecting {to_connect} to {dataset_obj}.") distance = compare_dataset(dataset['did'], to_connect.did) dataset_obj.add_connections(to_connect, distance) dataset_obj.save() else: print(f"Dataset {dataset_obj} is fully connected.") else: new = Dataset( did=dataset['did'], name=dataset['name'], file_format=dataset['format'], ) print(f"Created new {new}.") new.save() for to_connect in current: print(f"Connecting {to_connect} to {new}.") distance = compare_dataset(dataset['did'], to_connect.did) new.add_connections(new, distance) new.save()
def populate(limit): datasets = get_datasets(limit) pairs = get_pairs(datasets) for pair in pairs: d1 = pair[0] d2 = pair[1] if Dataset(did=d1['did']).fetch() != None \ and Dataset(did=d2['did']).fetch() != None: print( f"Datasets {d1['name']} and {d2['name']} already in database.") continue else: distance = compare_dataset(d1, d2) print( f"Distance between {d1['name']} and {d2['name']} is {distance}." ) dataset_1 = Dataset(did=d1['did'], name=d1['name'], file_format=d1['format']) dataset_2 = Dataset(did=d2['did'], name=d2['name'], file_format=d2['format']) dataset_1.save() dataset_2.save() dataset_1.add_connections(dataset_2, distance)
def resolve_similar_tasks(self, info, **kwargs): did = kwargs.get('did') task_type_id = kwargs.get('task_type_id') dataset = Dataset(did=did).fetch() tasks = dataset.get_similar_tasks(task_type_id) return [TaskSchema(**task.as_dict()) for task in tasks]
def resolve_close_connections(self, info, **kwargs): did = kwargs.get('did') distance = kwargs.get('distance') target = Dataset(did=did).fetch() return target.get_close_connections(distance)
def resolve_datasets(self, info): return [ DatasetSchema(**dataset.as_dict()) for dataset in Dataset().all ]
def resolve_dataset(self, info, did): dataset = Dataset(did=did).fetch() return DatasetSchema(**dataset.as_dict())
def __init__(self, **kwargs): super().__init__(**kwargs) self.dataset = Dataset(did=self.did).fetch()