def run(filename): server = request.form.get("host") params = request.form.get("params") print("params: {}".format(params)) if server == "SSH": ssh = utils.connect_ssh(app.config["rmt_host"], app.config["rmt_port"], app.config["rmt_username"], app.config["rmt_password"]) utils.upload_file(ssh, os.path.join(app.config["FILE_UPLOADS"], filename), filename) command = "cd /itu/s06d03/user_home/isleyen16/ai_tracking/ && python " + filename + " " + params stdin, stdout, stderr = ssh.exec_command(command) lines = stdout.readlines() print(lines) ssh.close() else: file = app.config["FILE_UPLOADS"] + "//" + filename Popen('python ' + file + " " + params) files[filename].condition = "Invoked" uname = platform.uname() sys_name = uname.system boot_time_timestamp = psutil.boot_time() bt = datetime.fromtimestamp(boot_time_timestamp) cpu_usage = psutil.cpu_percent() freq = psutil.cpu_freq() curr_freq = freq.current sysInfo = SystemInfo(sys_name, bt, cpu_usage, curr_freq) files[filename].sysInfo = sysInfo return render_template("public/monitor_results.html", files=files, servers=servers)
def upload_file(): file_path = "F:/picture/" file_path = file_path + readfrom with open(file_path, 'w') as f: f.write(file_source) utils.upload_file(file_path) pass
def write_messages_to_tsv(files, bucket_name, metadata_file=None): """ Consume the subscription and write results to tsv manifest Args: files(dict): a dictionary of object files { "url": "test_url", "md5": "test_md5", "size": 1 } bucket_name(str): bucket for uploading the manifest to metadata_file(str): metadata file for merging """ metadata_info = {} # Default filenames without merging fields = ["url", "size", "md5"] # merge extra metadata info from file if metadata_file: with open(metadata_file, "rt") as csvfile: csvReader = csv.DictReader(csvfile, delimiter="\t") # Build a map with url as the key for row in csvReader: if "url" in row: metadata_info[row["url"]] = { k: v for k, v in row.items() if k != "url" } # do merging if possible, and update fields need_merge = False first_row_need_merge = None for row_num, fi in enumerate(files): if fi["url"] in metadata_info: need_merge = True first_row_need_merge = first_row_need_merge or row_num for k, v in metadata_info[fi["url"]].items(): fi[k] = v if files and need_merge: # add new fields [ fields.append(k) for k in files[first_row_need_merge].keys() if k not in ["url", "size", "md5"] ] if len(files) > 0: # part the url parts = urlparse(files[0]["url"]) # generate unique manifest output now = datetime.now() current_time = now.strftime("%m_%d_%y_%H:%M:%S") filename = "manifest_{}_{}.tsv".format(parts.netloc, current_time) # write list of object metadata to a file utils.write_tsv(filename, files, fields) # Upload the file to google bucket utils.upload_file(bucket_name, filename, filename) logging.info("DONE!!!")
def test_python(): python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'pi_with_include.py') python_script_url = utils.upload_file(python_script_path) py_file_path = os.path.join(THIS_DIR, 'jobs', 'python', 'PySparkTestInclude.py') py_file_url = utils.upload_file(py_file_path) utils.run_tests(app_url=python_script_url, app_args="30", expected_output="Pi is roughly 3", app_name="/spark", args=["--py-files", py_file_url])
def update_challenge(): if request.method == 'POST' and len(request.form) >= 6: if logged_in(): errors = [] try: eid = request.form['eid'] name = request.form['name'] category = request.form['category'] value = int(request.form['value']) desc = request.form['description'] except: errors.append("Error: One or more fields missing or incorrect") if len(errors) > 0: return "Failed to update challenge" else: chal = Challenge.query.filter_by( cid=request.form['cid']).first() if chal: chal.name = name chal.category = category chal.value = value chal.description = desc db.session.commit() cid = chal.cid #try: files = request.files.getlist('file[]') if files and len(files) > 0: for f in files: if f and len(f.filename) > 0: #try: upload_file(f) new_file = File(cid, f.filename) db.session.add(new_file) db.session.commit() #except: #errors.append("Something went wrong") else: errors.append( "Error: something wrong with the file or filename" ) #except: # errors.append("No files recieved") db.session.close() return redirect('/challenge/{}'.format(cid)) db.session.close() return redirect(request.url) else: flash('Must be logged in to do this action') return redirect(url_for('auth.login')) else: return "Failed to add challenge"
def new_entry(): if request.method == 'POST': if logged_in(): errors = [] try: entry_type = int(request.form['type']) except: errors.append("Error: missing type") flash("Error: missing type", "error") return redirect('/challenge/{}'.format(request.form['cid'])) chal_id = request.form['cid'] name = request.form['name'] if entry_type in range(0, 2): desc = request.form['description'] new_entry = Entry( chal_id, entry_type, name, desc, ) elif entry_type == 2: if 'file' in request.files: f = request.files['file'] if f and len(f.filename) > 0: try: upload_file(f) new_entry = Entry(chal_id, entry_type, name, None, f.filename) except: errors.append("Something went wrong") else: errors.append( "Error: something wrong with the file or filename") else: errors.append("No file recieved") else: errors.append("Error: invalid type") if len(errors) > 0: return str(errors) else: db.session.add(new_entry) db.session.commit() db.session.close() return redirect('/challenge/{}'.format(chal_id)) else: flash('Must be logged in to do this action') return redirect(url_for('auth.login')) else: return "Failed to add entry"
def upload_image(): new_uuid = '{}{}'.format(uuid.uuid4(), uuid.uuid4()) try: utils.upload_file(request, new_uuid) except utils.ValidationError as e: return utils.json_response( ({ 'message': 'Image Upload Failed : {}'.format(e), }), 400) return utils.json_response({ 'message': 'Image Uploaded Successfully', 'uuid': new_uuid, })
def baidu_wenku_download(self, msg, userId): if not self.class_database_op.get_redis_kv(userId, 'inputing'): self.class_database_op.set_redis_kv(userId, 'inputing', 1) return '请输入您要下载的百度文库文档地址:' else: self.class_database_op.delete_redis_kv(userId, 'inputing') current_url = msg['Content'] self.class_database_op.step_back(userId) current_url = current_url.replace('wk.baidu.com', 'wenku.baidu.com') current_url = current_url.replace('www.baidu.com/sf_edu_wenku', 'wenku.baidu.com') current_url = current_url.replace('m.baidu.com/sf_edu_wenku', 'wenku.baidu.com') if current_url.find('https://wenku.baidu.com') != 0 and current_url.find('http://wenku.baidu.com') != 0: return "非百度文库文档地址,无法下载。" itchatmp.send("正在为您下载该文档,请稍候。", msg['FromUserName']) request_utils_ins = request_utils.request_utils() file_info = request_utils_ins.baidu_wenku_download(current_url) file_url = utils.upload_file(file_info[0], file_info[1]) import os os.remove(file_info[0]) itchatmp.send("请接收文件~由于微信限制,机器人不能给您直接发送文件,请将以下链接复制到外部浏览器下载~", msg['FromUserName']) return file_url
def get_exam_cal(self, msg, userId): self.class_database_op.step_back(userId) jwxt = self.class_database_op.get_jwxt_info(userId) if not jwxt: return "您还没有保存您的信息,选3来输入登录信息吧~" itchatmp.send("正在查询,请稍候。。。", msg['FromUserName']) request_utils_ins = request_utils.request_utils() exams = request_utils_ins.jwxt_get_exam_list(userId) print(exams) cal = utils.exams_list_to_cal(exams) file_name = 'cache/' + str(userId) + '.ics' file_show_name = utils.random_string(32) + '.ics' with open(file_name, 'wb') as f: f.write(cal.to_ical()) f.close() file_url = utils.upload_file(file_name, file_show_name) import os os.remove(file_name) itchatmp.send("请接收文件~由于微信限制,机器人不能给您直接发送文件,请将以下链接复制到外部浏览器下载~", msg['FromUserName']) return file_url
def test_disconnect_from_master(): python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'long_running.py') python_script_url = utils.upload_file(python_script_path) task_id = utils.submit_job( python_script_url, "{} {}".format(LONG_RUNNING_FW_NUM_TASKS, LONG_RUNNING_RUN_TIME_SEC), [ "--conf", "spark.mesos.driver.failoverTimeout=1800", "--conf", "spark.cores.max=1" ]) # Wait until executor is running utils.wait_for_executors_running(LONG_RUNNING_FW_NAME, LONG_RUNNING_FW_NUM_TASKS) # Block the driver's connection to Mesos master framework_info = shakedown.get_service(LONG_RUNNING_FW_NAME) (driver_host, port) = _parse_fw_pid_host_port(framework_info["pid"]) _block_master_connection(driver_host, port) # The connection will timeout after 15 minutes of inactivity. # Add 5 minutes to make sure the master has detected the disconnection. # The framework will be considered disconnected => failover_timeout kicks in. LOGGER.info( "Waiting {} seconds for connection with master to timeout...".format( MASTER_CONNECTION_TIMEOUT_SEC)) time.sleep(MASTER_CONNECTION_TIMEOUT_SEC + 5 * 60) # Restore the connection. The driver should reconnect. _unblock_master_connection(driver_host) # The executor and driver should finish. utils.check_job_output(task_id, "Job completed successfully")
def test_r(): r_script_path = os.path.join(THIS_DIR, 'jobs', 'R', 'dataframe.R') r_script_url = utils.upload_file(r_script_path) utils.run_tests(app_url=r_script_url, app_args='', expected_output="Justin", app_name="/spark")
def new_challenge(): if request.method == 'POST' and len(request.form) >= 6: if logged_in(): errors = [] #try: eid = request.form['eid'] name = request.form['name'] category = request.form['category'] value = int(request.form['value']) desc = request.form['description'] #except: if len(errors) > 0: return "Failed to add challenge [2]" else: new_chal = Challenge(eid, name, category, desc, value) db.session.add(new_chal) db.session.commit() cid = new_chal.cid #try: files = request.files.getlist('file[]') if files and len(files) > 0: for f in files: if f and len(f.filename) > 0: #try: upload_file(f) new_file = File(cid, f.filename) db.session.add(new_file) db.session.commit() #except: #errors.append("Something went wrong") else: errors.append( "Error: something wrong with the file or filename" ) #except: # errors.append("No files recieved") db.session.close() return redirect('/event/{}'.format(eid)) else: flash('Must be logged in to create challenges') return redirect(url_for('auth.login')) else: return "Failed to add challenge"
async def upload(self, ctx, file_name=""): """Uploads a file and prints out the download url""" if not file_name: file_name = ctx.guild.name path = (dirs.PDF_EXPORT_DIR / file_name).with_suffix(".pdf") url = utils.upload_file(path) await ctx.send(url)
def uploadFiles(): user_id = get_jwt_identity() user = User.query.get(user_id) teacher = user.teacher if (not teacher): return "Forbidden", 403 files = request.files.getlist('files') course_id = request.form['id'] if (not files): return 'No files', 400 added_files = [] for file in files: if file and CourseMaterials.allowed_file(file.filename): filename = CourseMaterials.get_filename( secure_filename(file.filename)) dir = os.path.join('static', 'course', 'materials', course_id) if not os.path.exists(dir): os.makedirs(dir) file_loc = os.path.join(dir, filename) file.save(file_loc) # upload to API & save record to db try: utils.upload_file(course_id, file_loc) file_db = CourseMaterial(filename=filename, course_id=int(course_id)) db.session.add(file_db) db.session.commit() added_files.append(filename) except requests.exceptions.RequestException as err: os.remove(file_loc) db.session.rollback() return jsonify({ 'filenames': added_files, 'status': "not saved" if len(added_files) == 0 else 'success' })
def install_vplx(self): result = utils.upload_file("vplx", "/tmp", self.conn) if result["st"]: # cmd_pip = f'pip3 install -r /tmp/vplx/requirements.txt' # result_pip = utils.exec_cmd(cmd_pip, self.conn) # if not result_pip["st"]: # print("Please install python module on /tmp/requirements.txt") return True
def create_predictor(): # Collecting fields. Target and file are mandatories target = request.form.get("target") file = request.files.get("file") employee_id = request.form.get("employee_id") length_of_service = request.form.get("length_of_service") age = request.form.get("age") job_title = request.form.get("job_title") if target is None or file is None: abort(400) # Save the training dataset filename = 'data_train.csv' upload_file(file, filename) # Train the model selected_feats, woe_dicts, clf, scaler, valid_metrics, dict_A5 = pandoras_box.create_predictor( filename, target, employee_id=employee_id, length_of_service=length_of_service, age=age, job_title=job_title) # Save the trained model for future use upload_object(selected_feats, "selected_feats.obj") upload_object(woe_dicts, "woe_dicts.obj") upload_object(clf, "clf.obj") upload_object(scaler, "scaler.obj") upload_object(valid_metrics, "valid_metrics.obj") upload_object(dict_A5, "dict_A5.obj") # Response body response = jsonify({'Result': 'Model successfully trained'}) # Managing response CORS response.headers.add('Access-Control-Allow-Methods', 'POST, GET, OPTIONS, DELETE, PUT') response.headers.add('Access-Control-Max-Age', '5000') response.headers.add( 'Access-Control-Allow-Headers', 'x-requested-with, Content-Type, Accept-Encoding, Accept-Language, Cookie, Referer' ) response.headers.add('Access-Control-Allow-Credentials', 'true') return response
def upload_file(request): # if not utils.check_permission(request.user.extra, 'banner_list_index'): # return JsonResponse(NO_PERMISSION) if request.method == "POST": is_ok, img_url = utils.upload_file(request, 'mysite') if is_ok: return JsonResponse({"status": 0, "message":"", "url": img_url}, safe = False) else: return JsonResponse({"status": 1, "message":"上传失败"})
def test_jar(app_name="/spark"): master_url = ("https" if utils.is_strict() else "http") + "://leader.mesos:5050" spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format( master_url, shakedown.dcos_acs_token()) jar_url = utils.upload_file(os.getenv('TEST_JAR_PATH')) utils.run_tests(app_url=jar_url, app_args=spark_job_runner_args, expected_output="All tests passed", app_name=app_name, args=["--class", 'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'])
def get_group_msg_img(self, uin, info): res = self.webqq.get_group_msg_img(uin, info) path = tempfile.mktemp() fp = open(path, 'wb') fp.write(res.read()) fp.close() filename = info.get("name") name, typ = filename.split(".") name = random.sample(name, 3) filename = "{0}.{1}".format("".join(name), typ) res = upload_file(filename, path) return res.geturl()
def upload_to_s3(file: FileStorage, prefix: str = 'images', bucket: str = BUCKET) -> Tuple[bool, str, str]: key, local_file = save_file(file) ok, name = upload_file(key=f'{prefix}/{key}', filename=local_file, bucket=bucket) if ok: os.remove(local_file) return ok, name, bucket
def get_check_img(self, vcode): """ 获取验证图片 """ url = "https://ssl.captcha.qq.com/getimage" params = [("aid", self.aid), ("r", random.random()), ("uin", self.qid)] helper = HttpHelper(url, params, jar=http_sock.cookiejar) res = helper.open() path = tempfile.mktemp() fp = open(path, 'wb') fp.write(res.read()) fp.close() res = upload_file("check.jpg", path) print res.geturl() check_code = None while not check_code: check_code = raw_input("打开上面连接输出图片上的验证码: ") return check_code.strip()
def get_check_img(self, vcode): """ 获取验证图片 """ url = "https://ssl.captcha.qq.com/getimage" params = [("aid", self.aid), ("r", random.random()), ("uin", self.qid)] helper = HttpHelper(url, params, jar = http_sock.cookiejar) res = helper.open() path = tempfile.mktemp() fp = open(path, 'wb') fp.write(res.read()) fp.close() res = upload_file("check.jpg", path) print res.geturl() check_code = None while not check_code: check_code = raw_input("打开上面连接输出图片上的验证码: ") return check_code.strip()
def export_to_ical(self, msg, userId): self.class_database_op.step_back(userId) itchatmp.send("正在查询,请稍候。。。", msg['FromUserName']) card = self.class_database_op.get_jwxt_info(userId) if not card: return "您还没有保存您的信息,选 3 来输入登录信息吧~" request_utils_ins = request_utils.request_utils() results = request_utils_ins.jwxt_total_login(userId) if not results[0]: return "登录教务系统失败!" lessons = request_utils_ins.jwxt_get_lesson_table(userId) lessons_list = [] for key in lessons: lesson = lessons[key] if lesson['time'] != '': time_list = lesson['time'].split(';') room_list = lesson['room'].split(';') for i in range(0, len(time_list)): ret_dict = {} ret_dict['name'] = lesson['name'] ret_dict['teacher'] = lesson['teacher_name'] key_list = re.compile(r"(.*){(.*)}", flags=re.M).findall(time_list[i]) ret_dict['time'] = key_list[0][0] ret_dict['week'] = key_list[0][1] ret_dict['room'] = room_list[i] lessons_list.append(ret_dict) cal = utils.lessons_list_to_cal(lessons_list) file_name = 'cache/' + str(userId) + '.ics' file_show_name = utils.random_string(32) + '.ics' with open(file_name, 'wb') as f: f.write(cal.to_ical()) f.close() file_url = utils.upload_file(file_name, file_show_name) import os os.remove(file_name) itchatmp.send("请接收文件~由于微信限制,机器人不能给您直接发送文件,请将以下链接复制到外部浏览器下载~", msg['FromUserName']) return file_url
def get_reaction_probabilities(app, fasta_id=None, fasta_file=None): session = session_management.get_session_id() if session is None: return session_management.bad_or_missing_session() template = None if request.method == GET: fasta_id = request.args[FASTA_ID] if fasta_id is None else fasta_id # check cache if fasta_id is not None and fasta_id != '': likelihoods = db.find_probanno(fasta_id) if likelihoods is not None: job = Job(session, CALCULATE_PROBANNO_JOB, fasta_id, status=COMPLETE) # add a record of us calculating this job unless we already have if db.retrieve_probanno(session, fasta_id) is None: db.insert_probanno_record(fasta_id, session, likelihoods[2]) return jsonify(job.to_dict_dto()) # download fasta file from Uniprot and continue try: fasta_file = get_fasta_by_id(app, fasta_id) if fasta_file is None else fasta_file except probanno.FastaNotFoundError: return not_found(fasta_id + " not found") template = request.args[TEMPLATE] if TEMPLATE in request.args else DEFAULT_TEMPLATE if request.method == PUT: fasta_id = request.form[FASTA_ID] if fasta_id is None else fasta_id if fasta_id is None: return missing_argument(FASTA_ID) if FASTA in request.files: fasta_file = app.config['UPLOAD_FOLDER'] + utils.upload_file(app, request.files[FASTA]) else: return missing_argument(FASTA_ID) template = request.form[TEMPLATE] if TEMPLATE in request.form else DEFAULT_TEMPLATE template_file = TEMPLATE_FILES[template] if template in TEMPLATE_FILES else TEMPLATE_FILES[DEFAULT_TEMPLATE] gen_id = request.args[FASTA_ID] if FASTA_ID in request.args and request.args[FASTA_ID] is not None else fasta_file fasta_name = get_name_from_fasta(fasta_file) template_model_file = app.config['MODEL_TEMPLATES'] + template_file job = Job(session, CALCULATE_PROBANNO_JOB, fasta_id) probanno_queue.enqueue(_async_get_reaction_probabilities, job, fasta_id, fasta_name, session, fasta_file, template_model_file, gen_id, job_id=job.id, timeout=600) return jsonify(retrieve_job(job.id).to_dict_dto())
def load_model(app): session = session_management.get_session_id() if session is None: return session_management.bad_or_missing_session() if 'file' not in request.files: return missing_argument('file') file = request.files['file'] filename = utils.upload_file(app, file, ALLOWED_EXTENSIONS) # load model w/ cobra model = None try: model = cobra_modeling.from_json_file(os.path.join(app.config['UPLOAD_FOLDER'], filename)) except BaseException as e: return bad_request("Invalid CobraPy Model. Not in proper cobra.io.json deserializable format") os.remove(os.path.join(app.config['UPLOAD_FOLDER'], filename)) if MODEL_ID not in request.form: return missing_argument(MODEL_ID) model_id = request.form[MODEL_ID] if db.find_model(session, model_id) is not None: db.delete_model(session, model_id) db.insert_model(session, model_id, cobra_modeling.model_to_json(model)) return Response()
def main(): # Read config file config = ConfigParser.ConfigParser() config.read('process_last_photo.cfg') img_dir = config.get('Motion', 'img_dir', "/home/pi/motion") server = config.get('Serving', 'server', "localhost:9000") bucket_name = config.get('S3', 'bucket', "rpizero-smart-camera-archive") user = config.get('Email', 'user') pwd = config.get('Email', 'pwd') # Find the latest image files = os.listdir(img_dir) full_paths = [os.path.join(img_dir, basename) for basename in files] filename_local = max(full_paths, key=os.path.getctime) # Identify objects in the picture using TensorFlow Serving classes, scores = query_mobilenet_server(filename_local, server) human_detected = False print("\n".join( ["{0}: {1:.2f}".format(c, s) for (c, s) in zip(classes, scores)])) for (c, s) in zip(classes, scores): if c == 'person' and s > 0.5: human_detected = True break print("human_detected = {}".format(human_detected)) # Upload file to S3 and remove the local copy url = upload_file(filename_local, bucket_name, human_detected) # Send e-mail notification if human_detected: subject = "Human detected" body = "\n".join( ["{0}: {1:.2f}".format(c, s) for (c, s) in zip(classes, scores)]) body += "\n\n{}".format(url) send_email(user, pwd, user, subject, body)
def predict(): # The csv file is mandatory: # if it's missing you have a 'bad request' error file = request.files.get("file") if file is None: abort(400) # It is mandatory to have a trained model # Without that, you have a 'bad request' error if object_exists("selected_feats.obj") is False \ or object_exists("woe_dicts.obj") is False \ or object_exists("clf.obj") is False \ or object_exists("scaler.obj") is False \ or object_exists("valid_metrics.obj") is False: abort(400) #Save the test dataset filename = 'data_test.csv' employee_id = request.form.get("employee_id") length_of_service = request.form.get("length_of_service") age = request.form.get("age") job_title = request.form.get("job_title") filename = 'data_test.csv' upload_file(file, filename) # Download the trained model selected_feats = download_object("selected_feats.obj") woe_dicts = download_object("woe_dicts.obj") clf = download_object("clf.obj") scaler = download_object("scaler.obj") valid_metrics = download_object("valid_metrics.obj") # Get the predictions score, y_hat = pandoras_box.get_prediction( filename, selected_feats, woe_dicts, clf, scaler, employee_id=employee_id, length_of_service=length_of_service, age=age, job_title=job_title) # Save predictions upload_object(score, "predict_score.obj") # Save predictions upload_object(score, "predict_score.obj") csv = str(employee_id) + ',score\n' for index, row in y_hat.iterrows(): csv += str(row[employee_id]) + "," + str(row['score']) + "\n" response = make_response(csv) cd = 'attachment; filename=mycsv.csv' response.headers['Content-Disposition'] = cd response.mimetype = 'text/csv' response.headers.add('Access-Control-Allow-Methods', 'POST, GET, OPTIONS, DELETE, PUT') response.headers.add('Access-Control-Max-Age', '5000') response.headers.add( 'Access-Control-Allow-Headers', 'x-requested-with, Content-Type, Accept-Encoding, Accept-Language, Cookie, Referer' ) response.headers.add('Access-Control-Allow-Credentials', 'true') return response
from utils import make_tarfile, upload_file, file_exists_in_bucket, purge_backups import os import sys import datetime as dt log = open("backup.log", "a") sys.stdout = log DIRECTORY = os.environ['BACKUP_DIR'] output_path = make_tarfile(DIRECTORY) print('Starting job @ {}'.format(dt.datetime.now())) print('Successfully compressed {} into {}'.format(DIRECTORY, output_path)) print('Beginning upload...') upload_file(output_path) if not file_exists_in_bucket(output_path): print('\nVerification of file upload to s3 failed...exiting') exit() print('\nVerified file upload to s3...Cleaning up') print('Removing backup archive {}'.format(output_path)) os.remove(output_path) purge_backups()
def main(): # Note: It is important to import the libraries needed within the function # so Spark does not attempt serializing the libraries to all the workers, # otherwise it could fail during Serialization/Deserialization # using the pickle methods. from mxinfer import load_images from mxinfer import predict from utils import get_args from utils import get_s3client from utils import fetch_s3_keys from utils import download_objects from utils import upload_file args = get_args() logger.info('received arguments:{}'.format(args)) conf = SparkConf().setAppName("Distributed Inference using MXNet and Spark") # we will set the number of cores per executor to 1 to force Spark to create # only one task per executor since MXNet efficiently uses all the cpus on the # system for inference conf.set('spark.executor.cores', '1') sc = pyspark.SparkContext(conf=conf) logger.info("Spark Context created") s3_client = get_s3client(args['access_key'], args['secret_key']) keys = fetch_s3_keys(args['bucket'], args['prefix'], s3_client) # filter out only png images. # you can also choose to check the content-type headers by doing # a head call against each S3-Key keys = filter(lambda x: x.endswith('.png'), keys) # number of keys n_keys = len(keys) if n_keys < args['batch']: args['batch'] = n_keys n_partitions = n_keys // args['batch'] logger.info('number of keys from s3: {}'.format(n_keys)) # if keys cannot be divided by args['batch'] . if (n_partitions * args['batch'] != n_keys): keys.extend(keys[: args['batch'] - (n_keys - n_partitions * args['batch'])]) logger.debug('Keys:{}'.format(keys)) n_partitions = len(keys) // args['batch'] logger.info("number of keys:{}, n_partitions:{}".format(len(keys), n_partitions)) # we will create partitions of args['batch'] rdd = sc.parallelize(keys, numSlices=n_partitions) logger.info('created rdd with {} partitions'.format(rdd.getNumPartitions())) sc.broadcast(args['bucket']) rdd = rdd.mapPartitions(lambda k : download_objects(args['bucket'], k)) rdd = rdd.mapPartitions(load_images) sc.broadcast(args) rdd = rdd.mapPartitions(lambda imgs: predict(imgs, args)) output = rdd.collect() # drop the extra keys that we added to fill the last batch keys = keys[:n_keys] output = output[:n_keys] logger.info("predictions: {}".format(output)) if args['output_s3_key'] and args['output_s3_bucket']: with open('/tmp/' + args['output_s3_key'] , 'w+') as f: for k, o in zip(keys, output): f.write("Key %s: Prediction: %s\n" % (k, o)) upload_file(args['output_s3_bucket'], args['output_s3_key'], '/tmp/' + args['output_s3_key'], s3_client)
def upload_repositories_file(repositories_file): utils.print_group("Uploading {} file".format(repositories_file)) utils.upload_file(repositories_file)
def save_user_content(): if not request.files: abort(400, 'No files present!') must_have_key_list = ['title', 'description', 'creator_name'] form = request.form keys = form.keys() # Check that we have a full language specific set of fields must_have_keys = { '_en': {'missing': None, 'error': None}, '_he': {'missing': None, 'error': None} } for lang in must_have_keys: must_have_list = [k+lang for k in must_have_key_list] must_have_set = set(must_have_list) must_have_keys[lang]['missing'] = list(must_have_set.difference(set(keys))) if must_have_keys[lang]['missing']: missing_keys = must_have_keys[lang]['missing'] must_have_keys[lang]['error'] = gen_missing_keys_error(missing_keys) if must_have_keys['_en']['missing'] and must_have_keys['_he']['missing']: em_base = 'You must provide a full list of keys in English or Hebrew. ' em = em_base + must_have_keys['_en']['error'] + ' ' + must_have_keys['_he']['error'] abort(400, em) # Set metadata language(s) to the one(s) without missing fields md_languages = [] for lang in must_have_keys: if not must_have_keys[lang]['missing']: md_languages.append(lang) user_oid = current_user.id file_obj = request.files['file'] filename = secure_filename(file_obj.filename) metadata = dict(form) metadata['user_id'] = str(user_oid) metadata['original_filename'] = filename metadata['Content-Type'] = mimetypes.guess_type(filename)[0] # Pick the first item for all the list fields in the metadata clean_md = {} for key in metadata: if type(metadata[key]) == list: clean_md[key] = metadata[key][0] else: clean_md[key] = metadata[key] # Make sure there are no empty keys for at least one of the md_languages empty_keys = {'_en': [], '_he': []} for lang in md_languages: for key in clean_md: if key.endswith(lang): if not clean_md[key]: empty_keys[lang].append(key) # Check for empty keys of the single language with the full list of fields if len(md_languages) == 1 and empty_keys[md_languages[0]]: abort(400, "'{}' field couldn't be empty".format(empty_keys[md_languages[0]][0])) # Check for existence of empty keys in ALL the languages elif len(md_languages) > 1: if (empty_keys['_en'] and empty_keys['_he']): abort(400, "'{}' field couldn't be empty".format(empty_keys[md_languages[0]][0])) # Create a version of clean_md with the full fields only full_md = {} for key in clean_md: if clean_md[key]: full_md[key] = clean_md[key] # Get the magic file info file_info_str = magic.from_buffer(file_obj.stream.read()) if not _validate_filetype(file_info_str): abort(415, "File type '{}' is not supported".format(file_info_str)) # Rewind the file object file_obj.stream.seek(0) # Convert user specified metadata to BHP6 format bhp6_md = _convert_meta_to_bhp6(clean_md, file_info_str) bhp6_md['owner'] = str(user_oid) # Create a thumbnail and add it to bhp metadata try: binary_thumbnail = binarize_image(file_obj) bhp6_md['thumbnail'] = {} bhp6_md['thumbnail']['data'] = urllib.quote(binary_thumbnail.encode('base64')) except IOError as e: current_app.logger.debug('Thumbnail creation failed for {} with error: {}'.format( file_obj.filename, e.message)) # Add ugc flag to the metadata bhp6_md['ugc'] = True # Insert the metadata to the ugc collection new_ugc = Ugc(bhp6_md) new_ugc.save() file_oid = new_ugc.id bucket = ugc_bucket saved_uri = upload_file(file_obj, bucket, file_oid, full_md, make_public=True) user_email = current_user.email user_name = current_user.name if saved_uri: console_uri = 'https://console.developers.google.com/m/cloudstorage/b/{}/o/{}' http_uri = console_uri.format(bucket, file_oid) mjs = get_mjs(user_oid)['mjs'] if mjs == {}: current_app.logger.debug('Creating mjs for user {}'.format(user_email)) # Add main_image_url for images (UnitType 1) if bhp6_md['UnitType'] == 1: ugc_image_uri = 'https://storage.googleapis.com/' + saved_uri.split('gs://')[1] new_ugc['ugc']['main_image_url'] = ugc_image_uri new_ugc.save() # Send an email to editor subject = 'New UGC submission' with open('editors_email_template') as fh: template = jinja2.Template(fh.read()) body = template.render({'uri': http_uri, 'metadata': clean_md, 'user_email': user_email, 'user_name': user_name}) sent = send_gmail(subject, body, editor_address, message_mode='html') if not sent: current_app.logger.error('There was an error sending an email to {}'.format(editor_address)) clean_md['item_page'] = '/item/ugc.{}'.format(str(file_oid)) return humanify({'md': clean_md}) else: abort(500, 'Failed to save {}'.format(filename))
def setup_module(module): if utils.hdfs_enabled(): utils.require_hdfs() utils.require_spark() utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"])