def test_init_empty_targets_no_documents(self): """targets data file cannot be empty""" with self.assertLogs(level="ERROR") as cm: crossmap = Crossmap(join(data_dir, "config-empty.yaml")) crossmap.build() self.assertTrue("data" in str(cm.output)) self.assertFalse(crossmap.valid)
def test_init_empty_documents(self): """Initializing with an empty dataset file""" with self.assertLogs(level="WARNING") as cm: crossmap = Crossmap(join(data_dir, "config-empty-documents.yaml")) crossmap.build() self.assertTrue("0" in str(cm.output)) self.assertTrue("Number of items" in str(cm.output)) self.assertTrue(crossmap.valid)
def test_target_index_is_saved(self): """Build without documents saves one index""" crossmap = Crossmap(config_nodocs) crossmap.build() targets_file = crossmap.settings.index_file("targets") docs_file = crossmap.settings.index_file("documents") self.assertTrue(exists(targets_file)) self.assertFalse(exists(docs_file))
def test_connect_with_partial_config(self): """after an instance is built, can connect to it""" # create settings with very minimal settings # (no data fields) settings = CrossmapSettings(config_simple_nodata, require_data_files=False) instance = Crossmap(settings) instance.load() self.assertEqual(instance.valid, True) self.assertEqual(instance.default_label, "targets")
def test_get_features_from_custom_file(self): """build with one collection, get features from separate data""" crossmap = Crossmap(config_advanced_features) crossmap.build() # the instance should have only one dataset self.assertEqual(len(crossmap.db.datasets), 1) # its feature map should have items from several data collections feature_map = crossmap.indexer.encoder.feature_map # from the primary data self.assertTrue("alice" in feature_map) # from files defined only via features: data: ... self.assertTrue("alpha" in feature_map)
def setUpClass(cls): cls.crossmap = Crossmap(config_file) cls.crossmap.build() # add dataset with single documents cls.crossmap.add("af_xz", af_xz, id="af_xz") cls.crossmap.add("af_wy_xz", af_wy_xz, id="af_wy_xz") cls.crossmap.add("jl", jl, id="jl")
def test_init_from_invalid(self): """Initializing with an invalid configuration file""" with self.assertLogs(level="ERROR") as cm: crossmap = Crossmap(config_noname) self.assertTrue("name" in str(cm.output)) self.assertFalse(crossmap.valid)
class CrossmapRemoveDatasetTests(unittest.TestCase): """Tests for removing individual datasets from a crossmap instance""" def setUp(self): settings = CrossmapSettings(config_single, create_dir=True) self.crossmap = Crossmap(settings) self.crossmap.build() self.assertTrue(self.crossmap) def tearDown(self): remove_crossmap_cache(data_dir, "crossmap_single") def test_remove_dataset(self): """Initializing a crossmap object create directory structure""" crossmap = self.crossmap self.assertTrue(exists(crossmap.settings.prefix)) new_doc = dict(data="A B C D") # dataset 'abc' does not exist, so searching should raise exception with self.assertRaises(Exception): crossmap.search(new_doc, "abc", n=1) crossmap.add("abc", new_doc, "abc:1") result_before = crossmap.search(new_doc, "abc", n=1) self.assertEqual(len(result_before["distances"]), 1) # removing dataset 'abc' should make search in 'abc' impossible crossmap.remove("abc") with self.assertRaises(InvalidDatasetLabel): crossmap.search(new_doc, "abc", n=1) # but most instances files and db should still exist result_targets = crossmap.search(new_doc, "targets", n=1) self.assertEqual(len(result_targets["distances"]), 1) self.assertTrue(exists(crossmap.settings.prefix)) def test_remove_all_datasets(self): """remove all datasets from an instance""" crossmap = self.crossmap self.assertTrue(exists(crossmap.settings.prefix)) doc = dict(data="A B") result_before = crossmap.search(doc, "targets", n=1) self.assertEqual(len(result_before["distances"]), 1) # removing the last document should prevent search ... crossmap.remove("targets") with self.assertRaises(InvalidDatasetLabel): crossmap.search(doc, "targets", n=1) # but some files and dir structure should still exist self.assertTrue(exists(crossmap.settings.prefix))
def setUpClass(cls): cls.crossmap = Crossmap(config_file) cls.crossmap.build() cls.db = cls.crossmap.indexer.db cls.manual_file = cls.crossmap.settings.yaml_file("manual") # at start, project only has "targets" and "documents" datasets cls.doc_Alice = dict(data="Alice A") cls.doc_A = dict(data="A") cls.doc_B = dict(data="B") cls.doc_AB = dict(data="A B")
def test_init_from_settings(self): """Initializing a crossmap object create directory structure""" # create settings by providing a directory # This will trigger search for crossmap.yaml settings = CrossmapSettings(data_dir) subdir = settings.prefix self.assertEqual(subdir, join(data_dir, "crossmap_default")) # data directory does not exist before init, exists after self.assertFalse(exists(subdir)) # initializing using a settings object crossmap = Crossmap(settings) self.assertTrue(exists(subdir)) # the crossmap is not valid because it has not been build yet self.assertFalse(crossmap.valid)
def setUpClass(cls): cls.crossmap = Crossmap(config_file) cls.crossmap.build() cls.targets = similars_docs
def setUpClass(cls): cls.crossmap = Crossmap(config_file) cls.crossmap.build()
from functools import wraps from json import dumps, loads from crossmap.crossmap import Crossmap from crossmap.settings import CrossmapSettings from crossmap.vectors import sparse_to_dense from os import environ from urllib.parse import unquote from django.http import HttpResponse from logging import info import yaml # load the crossmap object based on configuration saved in an OS variable config_path = environ.get('DJANGO_CROSSMAP_CONFIG_PATH') settings = CrossmapSettings(config_path, require_data_files=False) crossmap = Crossmap(settings) crossmap.load() info("database collections: " + str(crossmap.db._db.list_collection_names())) def get_vector(dataset, item_id): db = crossmap.indexer.db result = db.get_data(dataset, ids=[item_id]) return result[0]["data"] def decr_by_query(a): return -a["query"] def find_vector(item_id, dataset=None):
def setUpClass(cls): cls.crossmap = Crossmap(config_plain) cls.crossmap.build()
# output as json or tsv output = tsv_print if config.tsv else json_print # for build, settings check all data files are available # for other actions, the settings can be lenient settings = CrossmapSettings(config.config, require_data_files=(action == "build")) if not settings.valid: sys.exit() crossmap = None if action in {"search", "decompose"}: logging.getLogger().setLevel(level=logging.ERROR) if action in {"build", "search", "decompose", "add", "remove"}: crossmap = Crossmap(settings) if action in { "features", "diffuse", "distances", "matrix", "counts", "summary" }: crossmap = CrossmapInfo(settings) # ############################################################################ # actions associated with primary functionality and batch processing if action == "build": crossmap.build() if action == "delete": remove_db_and_files(settings) if action == "remove": crossmap.remove(config.dataset)
def setUpClass(cls): remove_crossmap_cache(data_dir, "crossmap_simple") cls.crossmap = Crossmap(config_simple) cls.crossmap.build() cls.feature_map_file = cls.crossmap.settings.tsv_file("feature-map")
def setUpClass(cls): settings = CrossmapSettings(config_simple) crossmap = Crossmap(settings) crossmap.build() cls.crossmap = CrossmapInfo(settings)
def setUpClass(cls): cls.crossmap = Crossmap(config_file) cls.crossmap.build() cls.db = cls.crossmap.indexer.db cls.manual_file = cls.crossmap.settings.yaml_file("manual")
def setUp(self): settings = CrossmapSettings(config_single, create_dir=True) self.crossmap = Crossmap(settings) self.crossmap.build() self.assertTrue(self.crossmap)
def setUpClass(cls): cls.settings = CrossmapSettings(config_plain) cls.crossmap = Crossmap(cls.settings) cls.crossmap.build() cls.crossinfo = CrossmapInfo(cls.settings)