def test_get_labeled_data(setup_celery, test_profile, test_project_labeled, test_queue_labeled, test_irr_queue_labeled, test_admin_queue_labeled, test_redis, tmpdir, settings): ''' This tests that the labeled data is pulled correctly ''' # This tests labeled data util call project = test_project_labeled project_labels = Label.objects.filter(project=project) fill_queue(test_queue_labeled, 'random', test_irr_queue_labeled, project.percentage_irr, project.batch_size) # get the labeled data and the labels labeled_data, labels = get_labeled_data(project) assert isinstance(labeled_data, pd.DataFrame) assert isinstance(labels, pd.DataFrame) # should have the same number of labels and labeled data as in project assert len(labels) == len(project_labels) project_labeled = DataLabel.objects.filter(data__project=project) assert len(labeled_data) == len(project_labeled) # check that the labeled data is returned matches the stuff in DataLabel assert len( set(project_labeled.values_list("data__upload_id", flat=True)) & set(labeled_data["ID"].tolist())) == len(labeled_data)
def download_data(request, project_pk): """This function gets the labeled data and makes it available for download Args: request: The POST request project_pk: Primary key of the project Returns: an HttpResponse containing the requested data """ project = Project.objects.get(pk=project_pk) data, labels = get_labeled_data(project) data = data.to_dict("records") buffer = io.StringIO() wr = csv.DictWriter(buffer, fieldnames=['ID', 'Text', 'Label'], quoting=csv.QUOTE_ALL) wr.writeheader() wr.writerows(data) buffer.seek(0) response = HttpResponse(buffer, content_type='text/csv') response['Content-Disposition'] = 'attachment;' return response
def download_model(request, project_pk): """This function gets the labeled data and makes it available for download Args: request: The POST request pk: Primary key of the project Returns: an HttpResponse containing the requested data """ project = Project.objects.get(pk=project_pk) # https://stackoverflow.com/questions/12881294/django-create-a-zip-of-multiple-files-and-make-it-downloadable zip_subdir = 'model_project' + str(project_pk) tfidf_path = os.path.join( settings.TF_IDF_PATH, 'project_' + str(project_pk) + '_tfidf_matrix.pkl') tfidf_vectorizer_path = os.path.join( settings.TF_IDF_PATH, 'project_' + str(project_pk) + '_vectorizer.pkl') readme_path = os.path.join(settings.BASE_DIR, 'core', 'data', 'README.pdf') dockerfile_path = os.path.join(settings.BASE_DIR, 'core', 'data', 'Dockerfile') requirements_path = os.path.join(settings.BASE_DIR, 'core', 'data', 'requirements.txt') start_script_path = os.path.join(settings.BASE_DIR, 'core', 'data', 'start_notebook.sh') usage_examples_path = os.path.join(settings.BASE_DIR, 'core', 'data', 'UsageExamples.ipynb') current_training_set = project.get_current_training_set() model_path = os.path.join( settings.MODEL_PICKLE_PATH, 'project_' + str(project_pk) + '_training_' + str(current_training_set.set_number - 1) + '.pkl') data, label_data = get_labeled_data(project) # open the tempfile and write the label data to it temp_labeleddata_file = tempfile.NamedTemporaryFile(mode='w', suffix=".csv", delete=False, dir=settings.DATA_DIR) temp_labeleddata_file.seek(0) data.to_csv(temp_labeleddata_file.name, index=False) temp_labeleddata_file.flush() temp_labeleddata_file.close() temp_label_file = tempfile.NamedTemporaryFile(mode='w', suffix=".csv", delete=False, dir=settings.DATA_DIR) temp_label_file.seek(0) label_data.to_csv(temp_label_file.name, index=False) temp_label_file.flush() temp_label_file.close() s = io.BytesIO() # open the zip folder zip_file = zipfile.ZipFile(s, "w") for path in [ tfidf_path, tfidf_vectorizer_path, readme_path, model_path, temp_labeleddata_file.name, temp_label_file.name, dockerfile_path, requirements_path, start_script_path, usage_examples_path ]: fdir, fname = os.path.split(path) if path == temp_label_file.name: fname = "project_" + str(project_pk) + "_labels.csv" elif path == temp_labeleddata_file.name: fname = "project_" + str(project_pk) + "_labeled_data.csv" # write the file to the zip folder zip_path = os.path.join(zip_subdir, fname) zip_file.write(path, zip_path) zip_file.close() response = HttpResponse(s.getvalue(), content_type="application/x-zip-compressed") response['Content-Disposition'] = 'attachment;' return response