def process_task(file_ids_list=None): files_to_process = boilerplate.list_files(recursive=True, prefix=boilerplate.UPLOAD_PREFIX) if file_ids_list: files_to_process = [ boilerplate.UPLOAD_PREFIX + file_id for file_id in file_ids_list if (boilerplate.UPLOAD_PREFIX + file_id) in files_to_process ] data_to_process = { file_id[len(boilerplate.UPLOAD_PREFIX):]: boilerplate.get_file(file_id).decode('utf-8') for file_id in files_to_process } processed_file_ids = list() files = boilerplate.list_files(recursive=True) print('HEYY') for file in files: print('first') process_data(file) processed_file_ids.append(file) # boilerplate.add_processed_file( # processed_file_id, # contents, # extension='txt' # ) return processed_file_ids
def query_endpoint(file_id=None): query_type = request.args.get('type') if request.method == 'POST': tags_required = request.get_json() else: tags_required = None if file_id is None and query_type is None: return jsonify({"error": boilerplate.ERROR_NO_QUERY_TYPE_SPECIFIED}) else: if file_id == "gold": if query_type == "statistics": return jsonify(boilerplate.get_gold_statistics()) if query_type == "examples": limit = request.args.get('limit') try: limit = int(limit) except ValueError: return jsonify({"error": "wrong limit parameter passed"}) return jsonify(boilerplate.get_gold_examples(limit)) else: processed_file, file_id = boilerplate.get_gold("txt") text = boilerplate.read_file(processed_file) else: processed_file_id = boilerplate.PROCESSED_PREFIX + file_id if processed_file_id in boilerplate.list_files(recursive=True): text = boilerplate.get_file(processed_file_id) else: return jsonify({"error": boilerplate.ERROR_NO_SUCH_FILE}) return jsonify(query_data(query_type, text, tags_required))
def get_file_endpoint(file_id): if file_id in boilerplate.list_files(recursive=True): contents = boilerplate.get_file(file_id) if file_id.startswith(boilerplate.PROCESSED_PREFIX) and \ file_id.endswith('.xml'): return Response(contents, mimetype='text/xml') return Response(contents, mimetype='text/plain') return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
def get_file(file_id): if file_id not in boilerplate.list_files(recursive=True): raise JSONRPCDispatchException(code=boilerplate.ERROR_NO_SUCH_FILE_CODE, message=boilerplate.ERROR_NO_SUCH_FILE) file_contents_base64 = None try: file_contents_base64 = b64encode(boilerplate.get_file(file_id)).decode("utf-8") except TypeError: raise JSONRPCDispatchException(code=boilerplate.ERROR_NO_FILE_PART_CODE, message=boilerplate.ERROR_NO_FILE_PART) return {"file_id": file_id, "file_contents_base64": file_contents_base64}
def do_query(file_id, query_type): if not query_type: return {"error": boilerplate.ERROR_NO_QUERY_TYPE_SPECIFIED} processed_file_id = boilerplate.PROCESSED_PREFIX + file_id if processed_file_id in boilerplate.list_files(recursive=True): return { "result": query_data({ processed_file_id: boilerplate.get_file(processed_file_id) }, query_type=query_type) } return {"error": boilerplate.ERROR_NO_SUCH_FILE}
def query_endpoint(file_id): query_type = request.args.get('type') if not query_type: return jsonify({"error": boilerplate.ERROR_NO_QUERY_TYPE_SPECIFIED}) processed_file_id = boilerplate.PROCESSED_PREFIX + file_id if processed_file_id in boilerplate.list_files(recursive=True): return jsonify({ "result": query_data( {processed_file_id: boilerplate.get_file(processed_file_id)}, query_type=query_type) }) return jsonify({"error": boilerplate.ERROR_NO_SUCH_FILE})
def get_file_endpoint(file_id): if file_id in boilerplate.list_files(recursive=True): response = make_response(boilerplate.get_file(file_id)) response.headers["Content-Disposition"] = \ "attachment; filename=%s" % file_id return response if file_id == "gold": query_type = request.args.get('type') processed_file, file_id = boilerplate.get_gold(query_type) return send_file(processed_file, mimetype='txt', attachment_filename=file_id, as_attachment=True) return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
def load_file_endpoint(): if request.method == 'POST': print('here') file_id = request.json['file'] print(file_id) if file_id in boilerplate.list_files(recursive=True): # processed_file = boilerplate.get_file(file_id) # print(processed_file.__class__) # address = os.path.join('/data', 'minio', file_id) # f = boilerplate.get_file(file_id) return send_file(file_id, mimetype='text/csv', attachment_filename=file_id, as_attachment=True) return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE}) return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
def process_task(file_ids_list=None): files_to_process = boilerplate.list_files(recursive=True, prefix=boilerplate.UPLOAD_PREFIX) if file_ids_list: files_to_process = [ boilerplate.UPLOAD_PREFIX + file_id for file_id in file_ids_list if (boilerplate.UPLOAD_PREFIX + file_id) in files_to_process ] data_to_process = { file_id[len(boilerplate.UPLOAD_PREFIX):]: boilerplate.get_file(file_id) for file_id in files_to_process } processed_file_ids = list() for processed_file_id, contents in process_data(data_to_process): processed_file_ids.append( boilerplate.add_processed_file(processed_file_id, contents)) return processed_file_ids
def process_task(file_ids_list=None): files_to_process = boilerplate.list_files(recursive=True, prefix=boilerplate.UPLOAD_PREFIX) if file_ids_list: files_to_process = [ boilerplate.UPLOAD_PREFIX + file_id for file_id in file_ids_list if (boilerplate.UPLOAD_PREFIX + file_id) in files_to_process ] data_to_process = { file_id[len(boilerplate.UPLOAD_PREFIX):]: boilerplate.get_file(file_id) for file_id in files_to_process } for filename, file_contents in data_to_process.items(): with open(join(TOMITA_PATH_IN, filename), 'wb') as f: f.write(file_contents) i = inotify.adapters.Inotify() i.add_watch(TOMITA_PATH_OUT) processed_file_ids = set() for (_, type_names, path, out_filename) in i.event_gen(yield_nones=False): print("PATH=[{}] FILENAME=[{}] EVENT_TYPES={}".format( path, out_filename, type_names)) if not out_filename.startswith('.') and \ out_filename.endswith('.xml') and \ 'IN_CLOSE_WRITE' in type_names: full_filename = join(path, out_filename) with open(full_filename, 'rb') as f: contents = BytesIO(f.read()) contents_length = getsize(full_filename) print(contents) generated_filename = boilerplate.add_processed_file( None, contents, "xml", contents_length) processed_file_ids.add(generated_filename) if len(processed_file_ids) >= len(set(data_to_process.keys())): break return list(processed_file_ids)
def list_files_endpoint(): return jsonify({'file_ids': boilerplate.list_files(recursive=True)})
def get_file_endpoint(file_id): if file_id in boilerplate.list_files(recursive=True): return boilerplate.get_file(file_id) return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})
def list_files(): return {'file_ids': boilerplate.list_files(recursive=True)}
def get_file_endpoint(file_id): if file_id in boilerplate.list_files(recursive=True): contents = boilerplate.get_file(file_id) return Response(contents, mimetype='text/plain') return jsonify({'error': boilerplate.ERROR_NO_SUCH_FILE})