def load_all_unlabeled(request): """ load all unlabeled text from Mongo DB to web :return: """ offset = int(request.GET.get("offset", 0)) limit = int(request.GET.get("limit", 10)) ca = get_mongo_client(uri='mongodb://localhost:27017/') all_unlabeled= ca["annotation_raw_data"].find({"labeled": False}) count = all_unlabeled.count() unlabeled = all_unlabeled.limit(limit).skip(limit * offset) result = list() for t in unlabeled: # !!!!!!!!!! 原来使用了uuid.uuid1()进行随机生成 #annotation_data = AnnotationRawData(text=t.get("text"), uuid=uuid.uuid1(), dataset_uuid=t.get("dataset_uuid")) annotation_data = AnnotationRawData(text=t.get("text"), uuid=t.get("uuid"), dataset_uuid=t.get("dataset_uuid")) annotation_data_serializer = AnnotationRawDataSerializer(annotation_data) result.append(annotation_data_serializer.data) response = APIResponse() response.data = json.dumps({ 'data': result, 'total_count': count }) response.code = 200 response.message = "SUCCESS" serializer = APIResponseSerializer(response) return JsonResponse(serializer.data)
def load_local_dataset(request): """ load local unlabeled dataset :return: """ if request.method == 'POST': file_path = request.body.get("filepath") else: file_path = request.GET.get("filepath") print(file_path) response = APIResponse() if os.path.exists(file_path): # read file ca = get_mongo_client(uri='mongodb://localhost:27017/') with open(file_path, 'r', encoding='utf-8') as f: for line in f: # label, txt = line.split(" ", 1) # print("get string %s" % line) text = line.strip() text_uuid = uuid.uuid1() annotation_raw_data = AnnotationRawData(text=text, uuid=text_uuid) annotation_raw_data_serializer = AnnotationRawDataSerializer(annotation_raw_data) ca["annotation_raw_data"].insert_one(annotation_raw_data_serializer.data) response.data = {"status": "success"} response.code = 200 response.message = "Load SUCCESS" else: response.data = {"status": "Failed"} response.code = 302 response.message = "the specified file is not exist" serializer = APIResponseSerializer(response) return JsonResponse(serializer.data)
def upload_remote_file(request): """ load data from file to mongodb, this is the main interface to load data :return: """ response = APIResponse() response.data = {"status": "Failed"} response.code = 302 if request.method == 'POST': # check if the post request has the file part if 'file' in request.FILES: file = request.FILES['file'] print(file.name) # if user does not select file, browser also # submit a empty part without filename if file.name != '': if file and allowed_file(file.name): # save file filename = secure_filename(file.name) file_path = os.path.join(UPLOAD_FOLDER, filename) with open(file_path, 'wb+') as destination: for chunk in file.chunks(): destination.write(chunk) # read file ca = get_mongo_client(uri='mongodb://localhost:27017/') # save data set data_set_uuid = uuid.uuid1() data_set = DataSet(name=file.name, uuid=data_set_uuid) data_set_serializer = DataSetSerializer(data_set) ca["dataset"].insert_one(data_set_serializer.data) # save annotation data with open(file_path, 'r', encoding='utf-8') as f: for line in f: text = line.strip() text_uuid = uuid.uuid1() annotation_raw_data = AnnotationRawData( text=text, uuid=text_uuid, dataset_uuid=data_set_uuid) annotation_raw_data_serializer = AnnotationRawDataSerializer( annotation_raw_data) ca["annotation_raw_data"].insert_one( annotation_raw_data_serializer.data) response.data = {"status": "success"} response.code = 200 response.message = "Load SUCCESS" else: response.message = "only support 'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif' file" else: response.message = "file name should not been empty" else: response.message = "no file has been upload" else: response.message = "Only support POST function" api_serializer = APIResponseSerializer(response) return JsonResponse(api_serializer.data)
def load_single_unlabeled(request): """ load one unlabeled text from Mongo DB to web :return: """ # read file ca = get_mongo_client(uri='mongodb://localhost:27017/') text = ca["annotation_raw_data"].find_one({"labeled": False}) annotation_data = AnnotationRawData(text=text.get("text"), uuid=text.get("uuid")) annotation_data_serializer = AnnotationRawDataSerializer(annotation_data) response = APIResponse() response.data = json.dumps(annotation_data_serializer.data) response.code = 200 response.message = "SUCCESS" serializer = APIResponseSerializer(response) return JsonResponse(serializer.data)
def create(self, validated_data): """ Create and return a new `Snippet` instance, given the validated data. """ return AnnotationRawData(**validated_data)