def send_tfidf_creation_task(project_pk): """Create and Save tfidf""" from core.utils.utils_model import create_tfidf_matrix, save_tfidf_matrix, save_tfidf_vectorizer tf_idf, vectorizer = create_tfidf_matrix(project_pk) file = save_tfidf_matrix(tf_idf, project_pk) save_tfidf_vectorizer(vectorizer, project_pk) return file
def test_project_gnb_data_tfidf(db, test_profile, tmpdir, settings): """This fixture only creates the test project without any data.""" proj = create_project("test_project", test_profile, classifier="gnb") test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv") add_data(proj, test_data) Data.objects.filter(project=proj) matrix = create_tfidf_matrix(proj.pk)[0] data_temp = tmpdir.mkdir("data").mkdir("tf_idf") settings.TF_IDF_PATH = str(data_temp) save_tfidf_matrix(matrix, proj.pk) return proj
def test_project_svm_data_tfidf(db, test_profile, tmpdir, settings): ''' This fixture only creates the test project without any data. ''' proj = create_project('test_project', test_profile, classifier="svm") test_data = read_test_data_backend( file='./core/data/test_files/test_no_labels.csv') add_data(proj, test_data) Data.objects.filter(project=proj) matrix = create_tfidf_matrix(proj.pk)[0] data_temp = tmpdir.mkdir('data').mkdir('tf_idf') settings.TF_IDF_PATH = str(data_temp) save_tfidf_matrix(matrix, proj.pk) return proj
def test_tfidf_vectorizer_labeled(test_project_labeled): """A CSR-format tf-idf matrix created from the data of test_project_data.""" Data.objects.filter(project=test_project_labeled) return create_tfidf_matrix(test_project_labeled.pk)[1]
def test_tfidf_matrix(test_project_data): """A CSR-format tf-idf matrix created from the data of test_project_data.""" Data.objects.filter(project=test_project_data) return create_tfidf_matrix(test_project_data.pk)[0]
def test_tfidf_matrix_labeled(test_project_labeled): ''' A CSR-format tf-idf matrix created from the data of test_project_data ''' Data.objects.filter(project=test_project_labeled) return create_tfidf_matrix(test_project_labeled.pk)[0]