def testCosineSimilarityProviderMatching(self): """Check correctness for querying the library with a library element.""" num_examples = 20 num_trials = 10 data_dim = 5 similarity = similarity_lib.CosineSimilarityProvider() library = np.float32(np.random.normal(size=(num_examples, data_dim))) library = tf.constant(library) library = similarity.preprocess_library(library) query_idx = tf.placeholder(shape=(), dtype=tf.int32) query = library[query_idx][np.newaxis, ...] (match_idx_op, match_similarity_op, _, _, _) = library_matching._max_similarity_match(library, query, similarity) # Use queries that are rows of the library. This means that the maximum # cosine similarity is 1.0 and is achieved by the row index of the query # in the library. with tf.Session() as sess: for _ in range(num_trials): idx = np.random.randint(0, high=num_examples) match_idx, match_similarity = sess.run( [match_idx_op, match_similarity_op], feed_dict={query_idx: idx}) # Fail if the match_idx != idx, and the similarity of match_idx does # is not tied with the argmax (which is 1.0 by construction). if match_idx != idx: self.assertClose(match_similarity, 1.0)
def testLibraryMatchingNoPredictions(self): """Test library_matching using hardcoded values with no predicted data.""" y_observed = np.array([[1, 2], [2, 1], [0, 0]], dtype=np.float32) y_query = np.array([[2, 5], [-3, 1], [0, 0]], dtype=np.float32) ids_observed = self.make_ids(3) ids_query = self.make_ids(3) expected_predictions = ['0', '2', '0'] masses_query = np.ones([3, 1], dtype=np.float32) query_data = self._package_data(ids=ids_query, spectrum=y_query, masses=masses_query) masses_observed = np.ones([3, 1], dtype=np.float32) observed_data = self._package_data(ids=ids_observed, spectrum=y_observed, masses=masses_observed) predicted_data = None library_matching_data = library_matching.LibraryMatchingData( query=query_data, observed=observed_data, predicted=predicted_data) _, predicted_data, _, _ = library_matching.library_matching( library_matching_data, predictor_fn=None, similarity_provider=similarity_lib.CosineSimilarityProvider(), mass_tolerance=3.0) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) predictions = sess.run(predicted_data[fmap_constants.INCHIKEY]) self.assertAllEqual(expected_predictions, predictions)
def perform_matching(self, ids_observed, ids_predicted, ids_query, masses_observed, masses_predicted, masses_query, y_observed, y_query, x_predicted, tf_transform, mass_tolerance): query_data = self._package_data(ids=ids_query, spectrum=y_query, masses=masses_query) predicted_data = self._package_data(ids=ids_predicted, spectrum=None, masses=masses_predicted) predicted_data[PREDICTOR_INPUT_KEY] = tf.constant(x_predicted) observed_data = self._package_data(ids=ids_observed, spectrum=y_observed, masses=masses_observed) library_matching_data = library_matching.LibraryMatchingData( query=query_data, observed=observed_data, predicted=predicted_data) predictor_fn = lambda d: tf_transform(d[PREDICTOR_INPUT_KEY]) similarity = similarity_lib.CosineSimilarityProvider() true_data, predicted_data, _, _ = (library_matching.library_matching( library_matching_data, predictor_fn, similarity, mass_tolerance, 10)) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) return sess.run([predicted_data, true_data])
def testLibraryMatchingNoObserved(self): """Test library_matching using hardcoded values with no observed data.""" tf_transform = lambda t: t + 2 x_predicted = np.array([[1, 1], [-3, -2]], dtype=np.float32) y_query = np.array([[2, 5], [-3, 1], [0, 0]], dtype=np.float32) ids_predicted = self.make_ids(2) ids_query = self.make_ids(3) expected_predictions = ['0', '1', '0'] masses_query = np.ones([3, 1], dtype=np.float32) query_data = self._package_data(ids=ids_query, spectrum=y_query, masses=masses_query) masses_predicted = np.ones([2, 1], dtype=np.float32) predicted_data = self._package_data(ids=ids_predicted, spectrum=None, masses=masses_predicted) predicted_data[PREDICTOR_INPUT_KEY] = tf.constant(x_predicted) observed_data = None library_matching_data = library_matching.LibraryMatchingData( query=query_data, observed=observed_data, predicted=predicted_data) predictor_fn = lambda d: tf_transform(d[PREDICTOR_INPUT_KEY]) _, predicted_data, _, _ = library_matching.library_matching( library_matching_data, predictor_fn=predictor_fn, similarity_provider=similarity_lib.CosineSimilarityProvider(), mass_tolerance=3.0) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) predictions = sess.run(predicted_data[fmap_constants.INCHIKEY]) self.assertAllEqual(expected_predictions, predictions)