Exemple #1
0
    def test_init_empty_targets_no_documents(self):
        """targets data file cannot be empty"""

        with self.assertLogs(level="ERROR") as cm:
            crossmap = Crossmap(join(data_dir, "config-empty.yaml"))
            crossmap.build()
        self.assertTrue("data" in str(cm.output))
        self.assertFalse(crossmap.valid)
Exemple #2
0
    def test_init_empty_documents(self):
        """Initializing with an empty dataset file"""

        with self.assertLogs(level="WARNING") as cm:
            crossmap = Crossmap(join(data_dir, "config-empty-documents.yaml"))
            crossmap.build()
        self.assertTrue("0" in str(cm.output))
        self.assertTrue("Number of items" in str(cm.output))
        self.assertTrue(crossmap.valid)
Exemple #3
0
    def test_target_index_is_saved(self):
        """Build without documents saves one index"""

        crossmap = Crossmap(config_nodocs)
        crossmap.build()

        targets_file = crossmap.settings.index_file("targets")
        docs_file = crossmap.settings.index_file("documents")
        self.assertTrue(exists(targets_file))
        self.assertFalse(exists(docs_file))
Exemple #4
0
    def test_connect_with_partial_config(self):
        """after an instance is built, can connect to it"""

        # create settings with very minimal settings
        # (no data fields)
        settings = CrossmapSettings(config_simple_nodata,
                                    require_data_files=False)
        instance = Crossmap(settings)
        instance.load()
        self.assertEqual(instance.valid, True)
        self.assertEqual(instance.default_label, "targets")
Exemple #5
0
    def test_get_features_from_custom_file(self):
        """build with one collection, get features from separate data"""

        crossmap = Crossmap(config_advanced_features)
        crossmap.build()

        # the instance should have only one dataset
        self.assertEqual(len(crossmap.db.datasets), 1)

        # its feature map should have items from several data collections
        feature_map = crossmap.indexer.encoder.feature_map
        # from the primary data
        self.assertTrue("alice" in feature_map)
        # from files defined only via features: data: ...
        self.assertTrue("alpha" in feature_map)
 def setUpClass(cls):
     cls.crossmap = Crossmap(config_file)
     cls.crossmap.build()
     # add dataset with single documents
     cls.crossmap.add("af_xz", af_xz, id="af_xz")
     cls.crossmap.add("af_wy_xz", af_wy_xz, id="af_wy_xz")
     cls.crossmap.add("jl", jl, id="jl")
Exemple #7
0
    def test_init_from_invalid(self):
        """Initializing with an invalid configuration file"""

        with self.assertLogs(level="ERROR") as cm:
            crossmap = Crossmap(config_noname)
        self.assertTrue("name" in str(cm.output))
        self.assertFalse(crossmap.valid)
class CrossmapRemoveDatasetTests(unittest.TestCase):
    """Tests for removing individual datasets from a crossmap instance"""
    def setUp(self):
        settings = CrossmapSettings(config_single, create_dir=True)
        self.crossmap = Crossmap(settings)
        self.crossmap.build()
        self.assertTrue(self.crossmap)

    def tearDown(self):
        remove_crossmap_cache(data_dir, "crossmap_single")

    def test_remove_dataset(self):
        """Initializing a crossmap object create directory structure"""

        crossmap = self.crossmap
        self.assertTrue(exists(crossmap.settings.prefix))
        new_doc = dict(data="A B C D")
        # dataset 'abc' does not exist, so searching should raise exception
        with self.assertRaises(Exception):
            crossmap.search(new_doc, "abc", n=1)
        crossmap.add("abc", new_doc, "abc:1")
        result_before = crossmap.search(new_doc, "abc", n=1)
        self.assertEqual(len(result_before["distances"]), 1)
        # removing dataset 'abc' should make search in 'abc' impossible
        crossmap.remove("abc")
        with self.assertRaises(InvalidDatasetLabel):
            crossmap.search(new_doc, "abc", n=1)
        # but most instances files and db should still exist
        result_targets = crossmap.search(new_doc, "targets", n=1)
        self.assertEqual(len(result_targets["distances"]), 1)
        self.assertTrue(exists(crossmap.settings.prefix))

    def test_remove_all_datasets(self):
        """remove all datasets from an instance"""

        crossmap = self.crossmap
        self.assertTrue(exists(crossmap.settings.prefix))
        doc = dict(data="A B")
        result_before = crossmap.search(doc, "targets", n=1)
        self.assertEqual(len(result_before["distances"]), 1)
        # removing the last document should prevent search ...
        crossmap.remove("targets")
        with self.assertRaises(InvalidDatasetLabel):
            crossmap.search(doc, "targets", n=1)
        # but some files and dir structure should still exist
        self.assertTrue(exists(crossmap.settings.prefix))
    def setUpClass(cls):
        cls.crossmap = Crossmap(config_file)
        cls.crossmap.build()
        cls.db = cls.crossmap.indexer.db
        cls.manual_file = cls.crossmap.settings.yaml_file("manual")
        # at start, project only has "targets" and "documents" datasets

        cls.doc_Alice = dict(data="Alice A")
        cls.doc_A = dict(data="A")
        cls.doc_B = dict(data="B")
        cls.doc_AB = dict(data="A B")
Exemple #10
0
    def test_init_from_settings(self):
        """Initializing a crossmap object create directory structure"""

        # create settings by providing a directory
        # This will trigger search for crossmap.yaml
        settings = CrossmapSettings(data_dir)
        subdir = settings.prefix
        self.assertEqual(subdir, join(data_dir, "crossmap_default"))
        # data directory does not exist before init, exists after
        self.assertFalse(exists(subdir))
        # initializing using a settings object
        crossmap = Crossmap(settings)
        self.assertTrue(exists(subdir))
        # the crossmap is not valid because it has not been build yet
        self.assertFalse(crossmap.valid)
Exemple #11
0
 def setUpClass(cls):
     cls.crossmap = Crossmap(config_file)
     cls.crossmap.build()
     cls.targets = similars_docs
Exemple #12
0
 def setUpClass(cls):
     cls.crossmap = Crossmap(config_file)
     cls.crossmap.build()
Exemple #13
0
from functools import wraps
from json import dumps, loads
from crossmap.crossmap import Crossmap
from crossmap.settings import CrossmapSettings
from crossmap.vectors import sparse_to_dense
from os import environ
from urllib.parse import unquote
from django.http import HttpResponse
from logging import info
import yaml

# load the crossmap object based on configuration saved in an OS variable
config_path = environ.get('DJANGO_CROSSMAP_CONFIG_PATH')
settings = CrossmapSettings(config_path, require_data_files=False)
crossmap = Crossmap(settings)
crossmap.load()
info("database collections: " + str(crossmap.db._db.list_collection_names()))


def get_vector(dataset, item_id):
    db = crossmap.indexer.db
    result = db.get_data(dataset, ids=[item_id])
    return result[0]["data"]


def decr_by_query(a):
    return -a["query"]


def find_vector(item_id, dataset=None):
 def setUpClass(cls):
     cls.crossmap = Crossmap(config_plain)
     cls.crossmap.build()
Exemple #15
0
# output as json or tsv
output = tsv_print if config.tsv else json_print

# for build, settings check all data files are available
# for other actions, the settings can be lenient
settings = CrossmapSettings(config.config,
                            require_data_files=(action == "build"))
if not settings.valid:
    sys.exit()

crossmap = None
if action in {"search", "decompose"}:
    logging.getLogger().setLevel(level=logging.ERROR)
if action in {"build", "search", "decompose", "add", "remove"}:
    crossmap = Crossmap(settings)
if action in {
        "features", "diffuse", "distances", "matrix", "counts", "summary"
}:
    crossmap = CrossmapInfo(settings)

# ############################################################################
# actions associated with primary functionality and batch processing

if action == "build":
    crossmap.build()
if action == "delete":
    remove_db_and_files(settings)

if action == "remove":
    crossmap.remove(config.dataset)
Exemple #16
0
 def setUpClass(cls):
     remove_crossmap_cache(data_dir, "crossmap_simple")
     cls.crossmap = Crossmap(config_simple)
     cls.crossmap.build()
     cls.feature_map_file = cls.crossmap.settings.tsv_file("feature-map")
 def setUpClass(cls):
     settings = CrossmapSettings(config_simple)
     crossmap = Crossmap(settings)
     crossmap.build()
     cls.crossmap = CrossmapInfo(settings)
 def setUpClass(cls):
     cls.crossmap = Crossmap(config_file)
     cls.crossmap.build()
     cls.db = cls.crossmap.indexer.db
     cls.manual_file = cls.crossmap.settings.yaml_file("manual")
 def setUp(self):
     settings = CrossmapSettings(config_single, create_dir=True)
     self.crossmap = Crossmap(settings)
     self.crossmap.build()
     self.assertTrue(self.crossmap)
Exemple #20
0
 def setUpClass(cls):
     cls.settings = CrossmapSettings(config_plain)
     cls.crossmap = Crossmap(cls.settings)
     cls.crossmap.build()
     cls.crossinfo = CrossmapInfo(cls.settings)