Beispiel #1
0
def populate(limit):

    datasets = get_datasets(limit)
    pairs = get_pairs(datasets)

    for pair in pairs:
        d1 = pair[0]
        d2 = pair[1]

        print(f"Adding database {d1['name']}.")
        print(f"Adding database {d2['name']}.")

        distance = compare_dataset(d1, d2)

        print(f"Distance between {d1['name']} and {d2['name']} is {distance}.")

        dataset_1 = Dataset(did=d1['did'],
                            name=d1['name'],
                            file_format=d1['format'])
        dataset_2 = Dataset(did=d2['did'],
                            name=d2['name'],
                            file_format=d2['format'])
        dataset_1.save()
        dataset_2.save()
        dataset_1.add_connections(dataset_2, distance)
Beispiel #2
0
def populate(limit):

    datasets = get_datasets(limit)
    pairs = get_pairs(datasets)

    for pair in pairs:
        d1 = pair[0]
        d2 = pair[1]


        if Dataset(did=d1['did']).fetch() != None \
            and Dataset(did=d2['did']).fetch() != None:

            print(
                f"Datasets {d1['name']} and {d2['name']} already in database.")
            continue
        else:
            distance = compare_dataset(d1, d2)

            print(
                f"Distance between {d1['name']} and {d2['name']} is {distance}."
            )

            dataset_1 = Dataset(did=d1['did'],
                                name=d1['name'],
                                file_format=d1['format'])
            dataset_2 = Dataset(did=d2['did'],
                                name=d2['name'],
                                file_format=d2['format'])
            dataset_1.save()
            dataset_2.save()
            dataset_1.add_connections(dataset_2, distance)
Beispiel #3
0
    def mutate(self, info, did):

        openml_dataset = openml.datasets.get_dataset(did)

        dataset = Dataset(did=did).fetch()
        if dataset is None:
            dataset = Dataset(did=did,
                              name=openml_dataset.name,
                              file_format=openml_dataset.format)
            dataset.save()
            dataset.connect_all()
        else:
            dataset.connect_all()

        return AddDataset(dataset=dataset, ok=True)
Beispiel #4
0
def populate_datasets(limit):

    datasets = get_datasets(limit)

    for dataset in datasets:
        datasets = get_datasets(limit)
        current = list(Dataset().all)
        dataset_obj = Dataset(did=dataset['did']).fetch()
        if dataset_obj in current:
            connections = dataset_obj.get_connections()
            others = current[:]
            others.remove(dataset_obj)
            unconnected = list(set(others) - set(connections))
            if len(unconnected) > 1:
                for to_connect in unconnected:
                    print(f"Connecting {to_connect} to {dataset_obj}.")
                    distance = compare_dataset(dataset['did'], to_connect.did)
                    dataset_obj.add_connections(to_connect, distance)
                    dataset_obj.save()
            else:
                print(f"Dataset {dataset_obj} is fully connected.")
        else:
            new = Dataset(
                did=dataset['did'],
                name=dataset['name'],
                file_format=dataset['format'],
            )
            print(f"Created new {new}.")
            new.save()
            for to_connect in current:
                print(f"Connecting {to_connect} to {new}.")
                distance = compare_dataset(dataset['did'], to_connect.did)
                new.add_connections(new, distance)
                new.save()