Beispiel #1
0
def mutual_information_feature_selection(dataset,
                                         features,
                                         target,
                                         num_reads=5000):
    """Run the MIFS algorithm on a QPU solver"""

    # Set up a QPU sampler that embeds to a fully-connected graph of all the variables
    sampler = DWaveCliqueSampler()

    # For each number of features, k, penalize selection of fewer or more features
    selected_features = np.zeros((len(features), len(features)))

    bqm = mutual_information_bqm(dataset, features, target)

    # This ensures that the soltion will satisfy the constraints.
    penalty = maximum_energy_delta(bqm)

    for k in range(1, len(features) + 1):
        kbqm = add_combination_penalty(bqm, k, penalty)
        sample = sampler.sample(kbqm,
                                label='Example - MI Feature Selection',
                                num_reads=num_reads).first.sample
        for fi, f in enumerate(features):
            selected_features[k - 1, fi] = sample[f]
    return selected_features
    def test_properties(self):
        sampler = DWaveCliqueSampler(failover=True)

        def mocksample(*args, **kwargs):
            count = getattr(mocksample, 'count', 0)

            if count:
                return dimod.SampleSet.from_samples([], energy=0., vartype='SPIN')
            else:
                mocksample.count = count + 1
                raise SolverOfflineError

        sampler.child.sample = mocksample

        G = sampler.target_graph
        qlr = sampler.qpu_linear_range
        qqr = sampler.qpu_quadratic_range

        self.assertIs(G, sampler.target_graph)
        self.assertIs(qlr, sampler.qpu_linear_range)
        self.assertIs(qqr, sampler.qpu_quadratic_range)

        sampler.sample_ising({}, {})

        self.assertIsNot(G, sampler.target_graph)
        self.assertIsNot(qlr, sampler.qpu_linear_range)
        self.assertIsNot(qqr, sampler.qpu_quadratic_range)
    def test_default(self):
        sampler = DWaveCliqueSampler()

        def mocksample(*args, **kwargs):
            raise SolverOfflineError

        sampler.child.sample = mocksample

        with self.assertRaises(SolverOfflineError):
            sampler.sample_ising({}, {})
    def test_pegasus(self):
        try:
            sampler = DWaveCliqueSampler(solver=dict(topology__type='pegasus'))
        except (ValueError, ConfigFileError, SolverNotFoundError):
            raise unittest.SkipTest("no Pegasus-structured QPU available")

        dimod.testing.assert_sampler_api(sampler)

        # submit a maximum ferromagnet
        bqm = dimod.AdjVectorBQM('SPIN')
        for u, v in itertools.combinations(sampler.largest_clique(), 2):
            bqm.quadratic[u, v] = -1

        sampler.sample(bqm).resolve()
    def test_noretry(self):
        sampler = DWaveCliqueSampler(failover=True, retry_interval=-1)

        def mocksample(*args, **kwargs):
            raise SolverOfflineError

        sampler.child.sample = mocksample

        def mocktrigger(*args, **kwargs):
            raise SolverNotFoundError

        sampler.child.trigger_failover = mocktrigger

        with self.assertRaises(SolverNotFoundError):
            sampler.sample_ising({}, {})
Beispiel #6
0
def get_sampler(topology):
    if topology in _SAMPLERS:
        return _SAMPLERS[topology]
    try:
        _SAMPLERS[topology] = DWaveCliqueSampler(solver=dict(
            topology__type=topology.lower()))
        return _SAMPLERS[topology]
    except (ValueError, ConfigFileError, SolverNotFoundError):
        raise unittest.SkipTest(f"no {topology}-structured QPU available")
def run_demo():
    # Read the feature-engineered data into a pandas dataframe
    # Data obtained from http://biostat.mc.vanderbilt.edu/DataSets
    demo_path = os.path.dirname(os.path.abspath(__file__))
    data_path = os.path.join(demo_path, 'data', 'formatted_titanic.csv')
    dataset = pd.read_csv(data_path)

    # Rank the MI between survival and every other variable
    scores = {}
    features = list(set(dataset.columns).difference(('survived',)))
    for feature in features:
        scores[feature] = mutual_information(prob(dataset[['survived', feature]].values), 0)

    labels, values = zip(*sorted(scores.items(), key=lambda pair: pair[1], reverse=True))

    # Plot the MI between survival and every other variable
    plt.figure()
    ax1 = plt.subplot(1, 2, 1)
    ax1.set_title("Mutual Information")
    ax1.set_ylabel('MI Between Survival and Feature')
    plt.xticks(np.arange(len(labels)), labels, rotation=90)
    plt.bar(np.arange(len(labels)), values)

    # The Titanic dataset provides a familiar, intuitive example available in the public
    # domain. In itself, however, it is not a good fit for solving by sampling. Run naively on
    # this dataset, it finds numerous good solutions but is unlikely to find the exact optimal solution.
    # There are many techniques for reformulating problems for the D-Wave system that can
    # improve performance on various metrics, some of which can help narrow down good solutions
    # to closer approach an optimal solution.
    # This demo solves the problem for just the highest-scoring features.

    # Select 8 features with the top MI ranking found above.
    keep = 8

    sorted_scores = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
    dataset = dataset[[column[0] for column in sorted_scores[0:keep]] + ["survived"]]
    features = list(set(dataset.columns).difference(('survived',)))

    # Build a QUBO that maximizes MI between survival and a subset of features
    bqm = dimod.BinaryQuadraticModel.empty(dimod.BINARY)

    # Add biases as (negative) MI with survival for each feature
    for feature in features:
        mi = mutual_information(prob(dataset[['survived', feature]].values), 1)
        bqm.add_variable(feature, -mi)

    # Add interactions as (negative) MI with survival for each set of 2 features
    for f0, f1 in itertools.combinations(features, 2):
        cmi_01 = conditional_mutual_information(prob(dataset[['survived', f0, f1]].values), 1, 2)
        cmi_10 = conditional_mutual_information(prob(dataset[['survived', f1, f0]].values), 1, 2)
        bqm.add_interaction(f0, f1, -cmi_01)
        bqm.add_interaction(f1, f0, -cmi_10)

    # Set up a QPU sampler with a fully-connected graph of all the variables
    sampler = DWaveCliqueSampler()

    # For each number of features, k, penalize selection of fewer or more features
    selected_features = np.zeros((len(features), len(features)))

    # Specify the penalty based on the maximum change in the objective
    # that could occur by flipping a single variable.  This ensures
    # that the ground state will satisfy the constraints.
    penalty = maximum_energy_delta(bqm)

    for k in range(1, len(features) + 1):
        kbqm = bqm.copy()
        kbqm.update(dimod.generators.combinations(features, k,
                                                  strength=penalty))  # Determines the penalty

        sample = sampler.sample(kbqm, num_reads=10000).first.sample

        for fi, f in enumerate(features):
            selected_features[k-1, fi] = sample[f]

    # Plot the best feature set per number of selected features
    ax2 = plt.subplot(1, 2, 2)
    ax2.set_title("Best Feature Selection")
    ax2.set_ylabel('Number of Selected Features')
    ax2.set_xticks(np.arange(len(features)))
    ax2.set_xticklabels(features, rotation=90)
    ax2.set_yticks(np.arange(len(features)))
    ax2.set_yticklabels(np.arange(1, len(features)+1))
    # Set a grid on minor ticks
    ax2.set_xticks(np.arange(-0.5, len(features)), minor=True)
    ax2.set_yticks(np.arange(-0.5, len(features)), minor=True)
    ax2.grid(which='minor', color='black')
    ax2.imshow(selected_features, cmap=colors.ListedColormap(['white', 'red']))

    plots_path = os.path.join(demo_path, "plots.png")
    plt.savefig(plots_path, bbox_inches="tight")
    print("Your plots are saved to {}".format(plots_path))
class MockPegasusDWaveSampler(MockDWaveSampler):
    def __init__(self, **config):
        super().__init__()

        self.properties.update(topology=dict(shape=[6], type='pegasus'))

        G = dnx.pegasus_graph(6)

        self.nodelist = list(G.nodes)
        self.edgelist = list(G.edges)


with unittest.mock.patch('dwave.system.samplers.clique.DWaveSampler',
                         MockChimeraDWaveSampler):
    chimera_sampler = DWaveCliqueSampler()

with unittest.mock.patch('dwave.system.samplers.clique.DWaveSampler',
                         MockPegasusDWaveSampler):
    pegasus_sampler = DWaveCliqueSampler()


@dimod.testing.load_sampler_bqm_tests(chimera_sampler)
@dimod.testing.load_sampler_bqm_tests(pegasus_sampler)
class TestDWaveCliqueSampler(unittest.TestCase):
    def test_api(self):
        dimod.testing.assert_sampler_api(chimera_sampler)
        dimod.testing.assert_sampler_api(pegasus_sampler)

    def test_clique(self):
        self.assertEqual(len(chimera_sampler.clique(2)), 2)
    for e in G.edges():
        #edge_weights[e] = 2*rng.binomial(1, .5)-1
        edge_weights[e] = rng.standard_normal()
    H = {}

    for n in G.nodes():
        col = []
        for m in G.nodes():
            if (n, m) in G.edges():
                col.append(edge_weights[(min(n, m), max(n, m))])
            else:
                col.append(0)
        H[n] = col
 
    pd.DataFrame.from_dict(H).to_csv("Clique_Gauss_Hamiltonian_{k}".format(k = i))

    t_1 = time.time()
    #sampler for sparse graphs
    #sampleset = EmbeddingComposite(DWaveSampler()).sample_ising({}, edge_weights, num_reads = 1000)
    #sampler for clique graphs
    sampleset = DWaveCliqueSampler().sample_ising({}, edge_weights, num_reads = 1000)
    t_2 = time.time()

    sampleset.to_pandas_dataframe().to_csv("Clique_Gauss_Sampler_Data_{k}".format(k=i))

    time_data = pd.read_csv("Time_Data")
    time_data["Clique_Gauss_Hamiltonian_{k}".format(k = i)] = [t_2 - t_1]
    time_data.to_csv("Time_Data")