def getlog(rid, ltype, offset, perm, disabled4reader): print "in getlog(), rid=", rid, ",ltype=", ltype, "offset=", offset document = _list.get_ds_doc(rid, perm) #print "document=",document if document is None: document = _list.get_doc(rid, perm) if document is None: return HttpResponse(json.dumps({"error": "data not found"}), content_type="application/json") filename = get_log_fname(rid, ltype) fsize, logtxt = get_log_content(filename, ltype, offset) sts = "n/a" if document: sts = document.status # call from job_logs(), disabled4reader as a flag if disabled4reader is None: return logtxt ret = { "log": logtxt, "id": rid, "status": sts, "fsize": fsize } #, "linenumb":endln} #time.sleep(2) #print "++++++++++++++++++++++++++++++++++++++++++++>>>>", ret return HttpResponse(json.dumps(ret), content_type="application/json")
def get_optlist(request, rid, perm, disabled4reader): print "in _api.get_optlist. rid=", rid if request.method == 'GET': #doc = Document.objects.get(id=rid) doc = _list.get_ds_doc(rid, perm) if not doc: #print "not found!" #return Response({"data not found":-1},status=404) return Response({"error": "data not found"}, status=404) arr = [] # get dataset row arr.append(get_row_4_opt(doc)) #print "arr=", arr #documents = Document.objects.all().filter(~Q(file_type='predict'),acl_list__lte=perm, train_id=rid).order_by('-id')[0:500] #print "before doc" # get option rows for this dataset documents = _list.get_opt_docs(rid, perm) #print "here, len=",len(documents) if documents: for doc in documents: #print "doc=",doc arr.append(get_row_4_opt(doc)) jobj = {} jobj["data"] = arr #print "jobj=",jobj return JsonResponse(jobj, safe=False)
def rm_data(rid, type, perm, disabled4reader): # for deleting dataset record only if type == "ds": document = _list.get_ds_doc(rid, perm) elif type == "pred": document = _predict.get_pred_doc(rid, perm, disabled4reader) if not document is None and len(document) > 0: document = document[0] if document is None: return Response({ "status": "failed", "msg": "record not found" }, status=404) # should we not really delete record? ret = document.delete() print "ret=", ret if ret[0] == 1: return Response({ "status": "deleted", "msg": "Record id=" + rid + " deleted" }) else: return Response( { "status": "failed", "msg": "Delete failed for id=" + rid }, status=404)
def get_model(request, rid, perm, disabled4reader): print "in get_model, rid=", rid # check permission document = _list.get_ds_doc(rid, perm) if not document: return Response({"error": "data not found"}, status=404) # get model dict local_processed_date = document.local_processed_date() ret = {} ret["id"] = document.id ret["filename"] = document.filename ret["file_type"] = document.file_type ret["status"] = document.status ret["local_processed_date"] = local_processed_date ret["ml_n_gram"] = document.ml_n_gram ret["ml_lib"] = document.ml_lib ret["ml_opts"] = json.loads(document.ml_opts) ret["accuracy"] = document.accuracy ret["train_id"] = document.train_id ret["option_state"] = document.option_state # get other info from mongo ret = ml_util.ml_get_model(ret) return JsonResponse(ret, safe=False)
def get_log_file(rid, ltype, offset, perm, disabled4reader): print 'in get_log_file(), rid=', rid, ",offset=", offset # check access document = _list.get_ds_doc(rid, perm) #print "document..=",document if document is None: # check if record exist document = _list.get_doc(rid, perm) if document is None: return Response({"error": "file not found"}) return _log.get_log_file(rid, ltype, offset, perm, disabled4reader)
def get_all_predicts(request,rid, perm,disabled4reader,count): print "in _predict.get_all_predicts, rid=",rid," user="******"error":"data not found"},status=404) #use "like" _icontains to get predict and ensemble_predict if count is None or int(count) <=0: predictions = Document.objects.all().filter(Q(file_type__icontains="predict"), train_id=rid).order_by('-id') else: predictions = Document.objects.all().filter(Q(file_type__icontains="predict"), train_id=rid).order_by('-id')[:int(count)] #serializer = PredictSerializer(predictions, many=True) #return Response(serializer.data) return Response(ml_serializers.pred2json(predictions))
def get_pred(rid, perm,disabled4reader): print 'in get_pred, rid=', rid predictions = Document.objects.all().filter(file_type__icontains="predict", id=rid) #print "t=",type(predictions),",predictions=",predictions # check if dataset doc accessible train_id=predictions[0].train_id ds_doc =_list.get_ds_doc(train_id, perm) if not ds_doc: return Response({"error":"data not found"},status=404) #serializer = PredictSerializer(predictions, many=True) #return Response(serializer.data) return Response(ml_serializers.pred2json(predictions))
def train(request, perm, disabled4reader): action = request.POST.get('action') rid = request.POST.get('hf_w_id') if action is None or not action in ("mllib_api", "scikit_api"): return Response({"error": "not supported."}, status=404) if rid is None: return Response({"error": "id not found"}, status=404) # check doc document = _list.get_ds_doc(rid, perm) #print "document..=",document if document is None: return Response({"error": "dataset not found"}, status=404) rid, msg_id, ret_msg = _list.ml_opts(request, perm, disabled4reader) ret = {"id": rid, "msg_id": msg_id, "ret_msg": ret_msg} return Response(ret)
def get_feat_impo(request, rid, perm,disabled4reader): # chk access document =_list.get_ds_doc(rid, perm) if not document: return Response({"data not found":-1}) # get data from mongo.dataset_info doc=query_mongo.find_one(settings.MONGO_OUT_DNS, settings.MONGO_OUT_PORT, settings.MONGO_OUT_DB, settings.MONGO_OUT_TBL , settings.MONGO_OUT_USR, settings.MONGO_OUT_PWD , '{"rid":'+rid+',"key":"feature_importance"}', '{"value":1,"_id":0}') if doc: arr=doc["value"] return Response(arr) else: return Response({"data not found":-1})
def get_ds_info(request, rid, perm, disabled4reader): print "in _api.get_ds_info. rid=", rid if request.method == 'GET': #doc = Document.objects.get(id=rid) doc = _list.get_ds_doc(rid, perm) if not doc: return Response({"error": "data not found"}, status=404) local_processed_date = doc.local_processed_date() arr = [] arr.append([ doc.id, doc.filename, doc.file_type, doc.status, local_processed_date, doc.desc ]) ret = {} ret["data"] = arr return JsonResponse(ret, safe=False)
def calculate_feature_impo(request,rid, perm,disabled4reader): print 'In calculate_feature_impo' document =_list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('list')) filename=document.filename uploadtype=document.file_type document.status='processing feature importance' #document.processed_date=datetime.datetime.now() why failed? document.processed_date=datetime.now() document.save() ds_id=None if document.option_state == "new_training": # having featuring output, not depends on source dataset id. ds_id=document.train_id else: ds_id=rid # call feature_impo API ret=invoke_feature_impo(filename, rid, uploadtype,"",ds_id) #execute shell script here print "feat import ret=", ret # update status code document = Document.objects.get(id=rid) msg_id=-1 if ret ==0 : document.status='importance_calculated' if settings.STS_1000_FEATURE_IMPO > document.status_code: document.status_code=settings.STS_1000_FEATURE_IMPO msg_id="231" else: document.status='feature importance failed' msg_id="90231" #document.processed_date=datetime.datetime.now() document.processed_date=datetime.now() document.save() print '* end Feature importance: rc=', ret, '; id=', rid return msg_id # _list to handle return page
def feature_impo_combs(request,rid, perm,disabled4reader): #print 'in feature_impo_all' document =_list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('list')) is_option='N' if document.train_id: is_option='Y' jopts=document.ml_opts if jopts: jopts=json.loads(document.ml_opts) jopts["learning_algorithm"]=jopts["learning_algorithm"].title().replace("_"," ") return render(request, 'atdml/feature_combs.html', {'document': document,'is_option':is_option ,"jopts":jopts}, )
def set_data(request, type, rid, perm, disabled4reader): # check support types if not type in ("_es_list", "dnn_state"): return Response({ "status": "failed", "msg": "not supported" }, status=404) # check doc document = _list.get_ds_doc(rid, perm) #print "document..=",document if document is None: return Response({ "status": "failed", "msg": "record not found" }, status=404) if "ensemble" in document.file_type: ds_list = request.POST.get("hf_w_ds_list") document.ds_list = ds_list document.save() return Response({ "status": "updated", "id": rid, "msg": "Dataset list updated for Id=" + rid }) elif type in ("dnn_state"): dnn_state = request.POST.get("dnn_state") document.ml_state = dnn_state document.save() return Response({"status": "updated", "id": rid, "msg": "succeeded"}) else: return Response({ "status": "failed", "msg": "not an ensemble record" }, status=404)
def job_logs(request, rid, perm, disabled4reader, cid=None): print 'in job_logs(), rid=', rid document = _list.get_ds_doc(rid, perm) print "document..=", document if not document: return HttpResponseRedirect(reverse('list')) filename = document.filename train_id = None if cid is None: prd_id = request.POST.get("_prd_id") else: prd_id = cid train_id = document.train_id #print "prd_id=",prd_id #get log files dir_str = os.path.join(settings.LOG_FOLDER, rid + "[a-z]*.log") # get a list of filenames alllist = glob.glob(dir_str) pipeline = [ 'retrieve', 'feature', 'pca', 'train', 'multi_run', 'feature_importance' ] #print file_list content1st = "" file_list = [] prdct_lst = [] exec_lst = [] if len(alllist) > 0: # remove path and leading rid #file_list=[ os.path.basename(f).replace(rid,'').replace('.log','') for f in sorted(alllist) ] ava_list = [ os.path.basename(f).replace(rid, '').replace('.log', '') for f in alllist ] # filter file list and keep pipeline order for i in pipeline: for j in ava_list: if i == j: file_list.append(i) #print file_list content1st = getlog(rid, file_list[0], 0, perm, None) #add predict print "document.file_type=", document.file_type prdct_doc_lst = Document.objects.all().filter( file_type__contains="predict", train_id=rid).order_by('-id')[0:200] for i in prdct_doc_lst: print i.id, i.filename print "prd_id=", prd_id if len(prdct_doc_lst) > 0: #prdct_lst=[ (str(d.id), d.filename) for d in sorted(prdct_doc_lst,reverse=True) ] prdct_lst = [(str(d.id), d.filename) for d in prdct_doc_lst] prdct_lst = sorted(prdct_lst, reverse=True) # trick to set latest predict_id for negative predict id if prd_id and prd_id.startswith('-') and prd_id[1:].isdigit(): plist = [i[0] for i in prdct_lst] prd_id = plist[0] #print "plist=",plist #print "prd_id2=",prd_id file_list.append("predict") #print "prdct_lst=",prdct_lst #for ensemble if len(content1st) == 0: content1st = getlog(prdct_lst[0][0], file_list[0], 0, perm, None) # find execution log ============== ============= exec_doc_lst = Document.objects.all().filter( file_type__contains="predict", train_id=rid, desc="has_exe_log").order_by('-id')[0:200] if len(exec_doc_lst) > 0: exec_lst = [(str(d.id), d.filename) for d in exec_doc_lst] exec_lst = sorted(exec_lst, reverse=True) file_list.append("execution log") jopts = document.ml_opts if jopts: jopts = json.loads(document.ml_opts) if "learning_algorithm" in jopts: jopts["learning_algorithm"] = jopts["learning_algorithm"].title( ).replace("_", " ") pca_jopts = document.ml_pca_opts if pca_jopts: pca_jopts = json.loads(document.ml_pca_opts) #print 'exec_lst=',exec_lst return render( request, 'atdml/joblogs.html', { 'document': document, 'file_list': file_list, 'content1st': content1st, 'prdct_lst': prdct_lst, 'exec_lst': exec_lst, 'disabled4reader': disabled4reader, 'perm': perm, 'prd_id': prd_id, 'train_id': train_id, 'jopts': jopts, 'pca_jopts': pca_jopts #, 'msg_error':msg_error, 'msg_success': msg_success }, )
def emulate(request, rid, cid, msg_id, perm, disabled4reader, from_api="n"): print 'in emulate, cid=', cid, ", rid=", rid, ",perm=", perm document = None if not rid is None and len(rid) > 0: document = _list.get_ds_doc(rid, perm) msg_error = "" msg_success = "" msg_info = "" msg_warning = "" new_id = None # set message for GET if msg_id == "101": msg_success = settings.MSG_UPLOAD_SUCCESS + " Id=" + str(cid) elif msg_id == "90101": msg_error = settings.MSG_UPLOAD_FAILED elif msg_id == "90901": msg_error = settings.MSG_RECAPTCHA_FAILED elif msg_id and "90902" in msg_id: arr = msg_id.split('.') if len(arr) > 1: # append count to the end msg_error = settings.MSG_UPLOAD_OVER_MAX + " " + arr[1] else: msg_error = settings.MSG_UPLOAD_OVER_MAX exe_type = None recaptcha = settings.RECAPTCHA_PREDICT if recaptcha is None: recaptcha = "N" # Handle file upload if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): desc = request.POST.get('_desc') emulater_config = request.POST.get('_emulater_config') train_id = request.POST.get('hf_train_id') print "desc=", desc, ",train_id=", train_id, ",perm=", perm # assume "<id> <type> <other info>"; append to desc for ref if " " in train_id: tarr = train_id.split(" ") train_id = tarr[0] exe_type = tarr[1].lower() desc = desc + ", by " + train_id + " " + exe_type newdoc = Document(docfile=request.FILES['docfile']) newdoc.filename = request.FILES[ 'docfile'] #hardcode to remove "upload/" newdoc.submitted_by = request.user.username newdoc.acl_list = perm newdoc.file_type = "emulate" # for AE page only newdoc.desc = desc # user input + ds info if not train_id is None and train_id > "": newdoc.status = "apk_queued" # flag "apk_queued" for prediction job newdoc.train_id = train_id # bind to a ML model for prediction #newdoc.file_type="predict" # predict page only else: newdoc.status = "submitted" # "submitted" for APK emulator without prediction newdoc.train_id = -1 # flag to not be a dataset #newdoc.desc="has_exe_log" # flag for execution log if not rid is None: newdoc.train_id = rid # TBD for rid assigned newdoc.save() new_id = str(newdoc.id) realname = os.path.basename(newdoc.docfile.name) #dir_indx=realname.index(settings.UPLOAD_DIR) print "realname=", realname print "UPLOAD_DIR=", settings.UPLOAD_DIR print "before Save =========" # filename may be different if filename duplicated if realname != newdoc.filename: newdoc.filename = realname newdoc.save() print "After Save ==========" # with prediction, invoke _predict ============ if not train_id is None and train_id > "": mdoc = _list.get_shared_doc(train_id, perm) print "mdoc=", mdoc action_type = 'upload_predict' if exe_type is None: exe_type = "apk-dynamic" ml_feat_threshold = None # invoke predict script (ret, msg_id, msg)= _predict.invoke_pred_script_by_docs( \ mdoc, newdoc, action_type, ml_feat_threshold \ , exe_type, emulater_config) if ret == 0 or ret == 205: msg_success = msg else: msg_error = msg else: # emulator only ============ (ret, msg_id, msg_success, msg_error)=invoke_apk_script(realname, cid=new_id \ , emulator_config=emulater_config) else: # invalid form # for return only form = DocumentForm() if from_api == "y": if not new_id is None: newdoc = _list.get_doc(new_id, perm) if not newdoc is None: msg_id = "0" msg = "APK submitted." retj = { "id": new_id, "status": newdoc.status, "by": newdoc.submitted_by, "filename": newdoc.filename, "msg_id": msg_id, "msg": msg } return Response(retj) else: return Response({"error": "submit error!"}, status=404) # for ae_list page =============== === return render( request, 'atdml/ae_list.html', { 'form': form, 'disabled4reader': disabled4reader, 'perm': perm, 'msg_error': msg_error, 'msg_success': msg_success, 'msg_info': msg_info, 'msg_warning': msg_warning, 'new_id': new_id #, 'options': options , "use_recaptcha": recaptcha }, #context_instance=RequestContext(request) ) elif request.method == 'GET': # =========== ============= print 'in _emulator.emulate() GET' if from_api == "y": doc = None if not cid is None: doc = _list.get_doc(cid, perm) if not doc is None: msg_id = "0" msg = "" retj = { "id": cid, "status": doc.status, "by": doc.submitted_by, "filename": doc.filename, "msg_id": msg_id, "msg": msg } return Response(retj) return Response({"error": "record not found"}, status=404) else: form = DocumentForm() else: # not POST ========== ==== print 'in _emulator.emulate not post' print "msg_error=" + msg_error, ",msg_success=" + msg_success # for ae_list page =============== === #return render_to_response( return render( request, 'atdml/ae_list.html', { 'form': form, 'disabled4reader': disabled4reader, 'perm': perm, 'msg_error': msg_error, 'msg_success': msg_success, 'msg_info': msg_info, 'msg_warning': msg_warning, 'new_id': new_id #, 'options': options , "use_recaptcha": recaptcha }, #context_instance=RequestContext(request) )
def set_feature(request,rid, perm,disabled4reader): #print "in set_feature" msg_id=None document =_list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('list')) filename=document.filename uploadtype=document.file_type #get files ck_list = None feat=None #print ck_list if request.method == 'POST': action=request.POST.get('action') #print 'in post action=', action has_change=0 if action=='vote_fid': ck_list =request.POST.getlist('ck_fid') to_verified =request.POST.getlist('to_verified') #print 'to_verified=',to_verified # add a new one for idx, fid in enumerate(ck_list): feat=None try: if not Feature_click.objects.all().filter(fid=fid, rid=rid): feat = Feature_click(fid = fid,rid=rid, vote=1) if len(to_verified)==1 and to_verified[0]=="1": feat.vote=FILTER_COUNT else: # increase vote count feat=Feature_click.objects.get(fid=fid, rid=rid) if len(to_verified)==1 and to_verified[0]=="1" and feat.vote <FILTER_COUNT: feat.vote=FILTER_COUNT else: feat.vote=feat.vote+1 feat.save() has_change=1 except : feat = None #error msg? # call feature_impo API to refresh combine list if has_change ==1: ds_id=None if document.option_state == "new_featuring": # having featuring output, not depends on source dataset id. ds_id=document.train_id ret=invoke_feature_impo(filename, rid, uploadtype,"comb_only",ds_id) #check ret? msg_id="232" # success msg # drop feature list item =========================================== if action=='drop_fid': ck_list =request.POST.getlist('vf_fid') #print ck_list # add a new one for idx, fid in enumerate(ck_list): feat=None try: if Feature_click.objects.all().filter(fid=fid, rid=rid): feat = Feature_click.objects.get(fid=fid, rid=rid) feat.vote=0 # reset to 0 feat.save() except : feat = None #error msg? msg_id="233" # success msg else: # not POST ========== ==== print 'invalid method' return feature_impo2(request,rid, perm,disabled4reader, msg_id)
def mrun2(request, rid, filename, msg_id, perm, disabled4reader): print "in mrun2()" # get perm #uname,grp,perm,disabled4reader=get_perm(request) #document = Document.objects.get(id=rid) document = _list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('atdml.views.list')) mrun_numb = "" msg_error = "" msg_success = "" if msg_id == "211": msg_success = settings.MSG_MRUN_SUCCESS elif msg_id == "90211": msg_error = settings.MSG_MRUN_DUPLICATED if request.method == 'POST': form = DocumentForm(request.POST, request.FILES) action = request.POST.get('action') mrun_numb = request.POST.get('mrun_numb') print "mrun=", mrun_numb print '*** mrun=', action, ' rid=', rid # upload to HDFS ====================================================== if document and ( action == 'mrun' or action == 'multiple_run' ): # ================================= Multi RUN ======== print 'In action = mrun. document.mrun_numb=', document.mrun_numb print '*** document.status_code=', document.status_code ret = 0 # only call task when number are different if document.mrun_numb != mrun_numb and mrun_numb: #update db document.status = 'processing' #document.processed_date=datetime.datetime.now() document.mrun_numb = mrun_numb document.save() #execute shell script here uploadtype = document.file_type ml_lib = document.ml_lib opt_jstr = document.ml_opts #print document.get_file_list() #print settings.TASK_EXE, #print settings.TASK_SRC_DIR+"/"+filename #print "in _result.py: settings.SPARK_URL=",settings.SPARK_URL #print "in _result.py: settings.MRUN_SCRIPT=",settings.MRUN_SCRIPT ret = subprocess.call([ settings.TASK_EXE, #bash settings.MRUN_SCRIPT, #multi_run.sh #settings.SPARK_SUBMIT, # spark cmd (shared) #settings.HDFS_UPLOAD_DIR+"/"+filename, # HDFS dir for input #settings.TRAIN_DES_DIR+"/"+filename, # dest dir rid, filename, mrun_numb, #settings.SPARK_URL, #URL for Spark uploadtype, ml_lib, opt_jstr, ]) ''' child=subprocess.Popen([settings.TASK_EXE, settings.MRUN_SCRIPT, settings.SPARK_SUBMIT, settings.HDFS_UPLOAD_DIR+"/"+filename, settings.TRAIN_DES_DIR+"/"+filename, rid, filename, mrun_numb ]) ret=child.returncode ''' # refresh document document = Document.objects.get(id=rid) if ret == 0: if settings.STS_800_MRUN > document.status_code: document.status = 'mruned' document.status_code = settings.STS_800_MRUN print '*** updated document.status_code=', document.status_code document.processed_date = datetime.datetime.now() document.save() print "after mrun subproc. ret=", ret msg_id = "211" else: msg_id = "90211" else: # repeated print "mrun repeated" msg_id = "90212" print '* end mRun: rc=', ret, '; id=', rid, ', fname=', filename #return HttpResponseRedirect('/atdml/'+str(rid)+'/f/mrun/'+msg_id+'/') else: # Invalid status or action print '*** Invalid status or action! id=', rid, ', fname=', filename else: # Not POST ========= form = DocumentForm() # A empty, unbound form # Load documents for the list page document = Document.objects.get(id=rid) predictions = Document.objects.all().filter( file_type="predict", train_id=rid).order_by('-id')[0:10] # get train option id train_id = document.train_id # get sample file list sflist = _predict.get_sfile_list(document.filename, document.id, document.file_type, train_id) # how to get dir? cv_grid_data, param_str, jopts = get_cv_grid(document, rid) if request.is_ajax(): print "Ajax Mrun" #sdoc = serializers.serializer('json', [document]) #print "sdoc="+sdoc document = Document.objects.get(id=rid) ret_msg = "" if msg_id == "211": ret_msg = settings.MSG_MRUN_SUCCESS ret_data = { "status": document.status, "id": rid, "pdate": document.local_processed_date(), "by": document.submitted_by, "vari": document.variance_short(), "mean": document.mean_short(), "msg": ret_msg + " Id=" + rid, "src": mrun_numb } return HttpResponse(json.dumps(ret_data), content_type="application/json") elif msg_id == "90211": # failed ret_msg = settings.MSG_MRUN_FAILED ret_data = {"msg": ret_msg + " Id=" + rid} print json.dumps(ret_data) return HttpResponse(json.dumps(ret_data), content_type="application/json", status=400) elif msg_id == "90212": # duplicated ret_msg = settings.MSG_MRUN_DUPLICATED ret_data = {"msg": ret_msg + " Id=" + rid} print json.dumps(ret_data) return HttpResponse(json.dumps(ret_data), content_type="application/json", status=400) #time.sleep(2) has_roc = has_result_file(rid, str(rid) + "_roc.json") has_mrun = has_result_file(rid, str(rid) + "_mrun.json") has_score = has_result_file(rid, str(rid) + "_score_graph.json") print "has_roc=", has_roc, ", has_mrun=", has_mrun, ", has_score=", has_score return render( request, 'atdml/result.html', { 'document': document, 'form': form, 'predictions': predictions, 'disabled4reader': disabled4reader, 'perm': perm, 'msg_error': msg_error, 'msg_success': msg_success, 'sflist': sflist, "cv_grid_data": cv_grid_data, "param_str": param_str, "jopts": jopts, "has_roc": has_roc, "has_mrun": has_mrun, "has_score": has_score }, #context_instance=RequestContext(request) )
def feature_impo_all(request,rid, perm,disabled4reader): #print 'in feature_impo_all' document =_list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('list')) filename=document.filename #get files out_FIRM=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_FIRM.txt" out_PROB=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_PROB.txt" out_IT=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_IT.txt" flist1,flist2,flist3 = get_feat_importance(rid,out_FIRM,out_PROB, out_IT); outlist1=[] outlist2=[] outlist3=[] # replace \t with html tag; TBD to use bootstrap table... for line in flist1[:LIST_COUNT]: out="" for idx, item in enumerate(line.split('\t')): item=item.replace("<","<").replace(">",">") print idx, item if idx==2: out += '<td data-placement="bottom" data-toggle="tooltip" ' \ +' title="'+item+'">'+show_partial(item)+'</td>' elif idx==0: out += '<td class="'+item+'">'+item+'</td>' else: out += '<td>'+item+'</td>' print "out=",out outlist1.append(out) for line in flist2[:LIST_COUNT]: out="" for idx, item in enumerate(line.split('\t')): item=item.replace("<","<").replace(">",">") print idx, item if idx==2: out += '<td data-placement="bottom" data-toggle="tooltip" ' \ +' title="'+item+'">'+show_partial(item)+'</td>' elif idx==0: out += '<td class="'+item+'">'+item+'</td>' else: out += '<td>'+item+'</td>' print "out=",out outlist2.append(out) for line in flist3[:LIST_COUNT]: out="" for idx, item in enumerate(line.split('\t')): item=item.replace("<","<").replace(">",">") print idx, item if idx==2 and len(item)>MAX_DISPLAY_LEN: out += '<td data-placement="bottom" data-toggle="tooltip" ' \ +' title="'+item+'">'+show_partial(item)+'</td>' elif idx==0: out += '<td class="'+item+'">'+item+'</td>' else: out += '<td>'+item+'</td>' print "out=",out outlist3.append(out) #line = line.replace('\t','</td><td>') is_option='N' if document.train_id: is_option='Y' jopts=document.ml_opts if jopts: jopts=json.loads(document.ml_opts) jopts["learning_algorithm"]=jopts["learning_algorithm"].title().replace("_"," ") return render(request, 'atdml/feature_all.html', {'document': document, 'flist1':outlist1, 'flist2':outlist2, 'flist3':outlist3,'is_option':is_option ,"jopts":jopts}, )
def feature_impo2(request,rid, perm,disabled4reader,msg_id): print "in feature_impo2(), rid=",rid msg_success="" msg_error="" #msg_id for message after POST and avoid re-POST if msg_id=="231": msg_success=settings.MSG_FEATURE_IMPO_SUCCESS+" Id="+str(rid) elif msg_id=="232": msg_success=settings.MSG_FEATURE_SET_SUCCESS+" Id="+str(rid) elif msg_id=="233": msg_success=settings.MSG_FEATURE_DROP_SUCCESS+" Id="+str(rid) elif msg_id>="90000": msg_error=settings.MSG_FEATURE_IMPO_FAILED+" Id="+str(rid) document =_list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('list')) filename=None if document: filename=document.filename if not filename: msg_error="Dataset not found! id="+rid msg_id="90000" print "msg_error=",msg_error #get verified features from db try: vflist = Feature_click.objects.all().filter(rid=rid,vote__gte=FILTER_COUNT).order_by('-vote')[:LIST_COUNT] except : vflist = [] #get combined features from file out_COMB=settings.RESULT_DIR_FULL+"/"+str(rid)+"/"+str(rid)+"_score_combine.txt" lines =[] outlist1 =[] items=[] no_feature="N" try: with open(out_COMB,'r') as f: lines=f.read().splitlines() except: no_feature="Y" pass if len(lines)>=LIST_COUNT: lines=lines[:LIST_COUNT] # replace \t with html tag for idx, line in enumerate(lines): #line = line.replace('\t','</td><td>') items = line.split('\t') # chk box, fid, score, desc, id line='<td ><input type="checkbox" class="checkbox" name="ck_fid" value="'+items[0]+'"></input></td>' \ +'<td>'+items[0]+'</td><td>'+items[1]+'</td><td data-placement="bottom" data-toggle="tooltip" ' \ +' title="' # escape < and > for html items[2]=items[2].replace("<","<").replace(">",">") if len(items[2])>MAX_DISPLAY_LEN: line+=items[2]+'">'+show_partial(items[2]) else: line+='">'+items[2] line+='</td><td>'+str(idx+1)+'</td>' outlist1.append(line) jopts=document.ml_opts if jopts: jopts=json.loads(document.ml_opts) jopts["learning_algorithm"]=jopts["learning_algorithm"].title().replace("_"," ") return render(request, 'atdml/feature.html', {'document': document, 'vflist':vflist, 'flist1':outlist1, 'msg_success': msg_success, 'msg_error': msg_error , 'no_feature': no_feature, 'jopts':jopts }, #context_instance=RequestContext(request) )
def result2(request, rid, oid, perm, disabled4reader): print 'in result2, rid=', rid, ', oid=', oid o_rid = rid # get train option doc, if oid provided if oid > 0: rid = oid document = _list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('atdml.views.list')) # for return only #form=DocumentForm() predictions = [ ] #Document.objects.all().filter(file_type="predict", train_id=rid).order_by('-id')[0:10] # get train option id train_id = document.train_id ml_lib = document.ml_lib status = document.status # get sample file list sflist = _predict.get_sfile_list(document.filename, document.id, document.file_type, train_id) # how to get dir? # get cross validation info cv_grid_data, param_str, jopts = get_cv_grid(document, rid) print "************** ml_has_cv=", document.ml_has_cv, cv_grid_data if jopts: print "rid=", rid, ", jopts=", jopts else: print "rid=", rid, ", jopts not found" has_roc = has_result_file(rid, str(rid) + "_roc.json") has_mrun = has_result_file(rid, str(rid) + "_mrun.json") has_score = has_result_file(rid, str(rid) + "_score_graph.json") print "has_roc=", has_roc, ", has_mrun=", has_mrun, ", has_score=", has_score has_result = None # check algorithm train_opt = {} if not document.ml_opts is None and len(document.ml_opts) > 0: train_opt = json.loads(document.ml_opts) # if document.status_code >= 500: # check if clustering data is in if has_result_file(rid, str(rid) + "_cluster*.png" ) and train_opt["learning_algorithm"] in ('kmeans'): has_result = "U" else: # check if png for classification exists? has_result = "Y" elif ml_lib == "dnn": # allow DNN to view status has_result = "Y" has_featc = has_result_file(rid, str(rid) + "_feat_coef.json") has_fp = has_result_file(rid, str(rid) + "_false_pred.json") # get ml_opts feature_excluded_list = None if "has_excluded_feat" in train_opt and train_opt["has_excluded_feat"] == 1: # get data from mongo.dataset_info try: doc = query_mongo.find_one( settings.MONGO_OUT_DNS, settings.MONGO_OUT_PORT, settings.MONGO_OUT_DB, settings.MONGO_OUT_TBL, settings.MONGO_OUT_USR, settings.MONGO_OUT_PWD, '{"rid":' + rid + ',"key":"feature_excluded"}', '{"value":1}') if not doc is None: #print "doc type=", type(doc), ",doc=",doc feature_excluded_list = doc["value"] print "feature_excluded_list=", feature_excluded_list except Exception as e: print "Exception from MongoDB:", e rpage = 'atdml/result.html' if oid > 0: rpage = 'atdml/result_opts.html' feat_str = "" if not feature_excluded_list is None: feat_str = ','.join(str(i) for i in feature_excluded_list) print "has_roc=", has_roc, ", has_mrun=", has_mrun, ", has_result=", has_result, "rpage=", rpage # get perf and dataset info if document.perf_measures and document.perf_measures != "null": perf_measures = json.loads(document.perf_measures) else: perf_measures = {} if document.dataset_info and document.dataset_info != "null": dataset_info = json.loads(document.dataset_info) else: dataset_info = {} return render( request, #'atdml/result.html', rpage, { "document": document, "predictions": predictions, "sflist": sflist #, "form": form , "disabled4reader": disabled4reader, "perm": perm, "cv_grid_data": cv_grid_data, "param_str": param_str, "has_fp": has_fp, "jopts": jopts, "has_roc": has_roc, "has_mrun": has_mrun, "has_result": has_result, "has_featc": has_featc, "has_score": has_score, "feature_excluded": feat_str, "ml_lib": ml_lib, "status": status, "tp": perf_measures["tp"] if "tp" in perf_measures else "", "tn": perf_measures["tn"] if "tn" in perf_measures else "", "fp": perf_measures["fp"] if "fp" in perf_measures else "", "fn": perf_measures["fn"] if "fn" in perf_measures else "", "phi": '%0.5f' % perf_measures["phi"] if "phi" in perf_measures else "", "fscore": '%0.5f' % perf_measures["fscore"] if "fscore" in perf_measures else "", "roc_auc": '%0.5f' % perf_measures["roc_auc"] if "roc_auc" in perf_measures else "", "class_count": dataset_info["class_count"] if "class_count" in dataset_info else "", "training_fraction": dataset_info["training_fraction"] if "training_fraction" in dataset_info else "", "dataset_count": dataset_info["dataset_count"] if "dataset_count" in dataset_info else "", "MEDIA_URL": settings.MEDIA_URL }, )
def get_post_predict(request, rid, hash, perm, disabled4reader): print "in get_post_predict, hash=", hash, " user="******"not found!" #ret={"error":"data model not found!"} return Response({"error": "dataset not found"}, status=404) if hash: hash = hash.lower() if request.method == 'GET': print "In GET: rid=", rid, ",hash=", hash # by prediction if hash.isdigit(): doc = Document.objects.all().filter(file_type="predict", train_id=rid, id=hash) else: doc = Document.objects.all().filter(file_type="predict", train_id=rid, filename=hash) print "doc=", doc # get by md5/filename if len(doc) > 0: #slz = PredictSerializer(doc, many=True) #return Response(slz.data) return Response(ml_serializers.pred2json(doc)) return Response({"error": "prediction not found"}, status=404) action_type = 'hash_predict' if request.method == 'POST': print "in POST, list=", request.POST.get('list') verbose = request.POST.get('verbose') verbose = "0" if verbose is None else verbose print "verbose=", verbose host = request.POST.get('host') host = "" if host is None else host port = request.POST.get('port') port = "" if port is None else port db = request.POST.get('db') db = "" if db is None else db tbl = request.POST.get('tbl') tbl = "" if tbl is None else tbl usr = request.POST.get('usr') usr = "" if usr is None else usr pwd = request.POST.get('pwd') pwd = "" if pwd is None else pwd model_filename = request.POST.get('model_filename') model_filename = "" if model_filename is None else model_filename keep_flag = request.POST.get('keep_flag') keep_flag = "0" if keep_flag is None else keep_flag predict_list = [] pred_doc = [] # for offline massive prediction if hash == 'list_offline': hash_list = request.POST.get('list') feat_threshold = request.POST.get('feat_threshold') pred_doc = _predict.predict_massive(document, hash_list, host=host, port=port, db=db, tbl=tbl, usr=usr, pwd=pwd, model_filename=model_filename, keep_flag=keep_flag, feat_threshold=feat_threshold) return Response(pred_doc) # for ONE hash list elif 'list' in hash: hash_str = request.POST.get('list') if hash_str: hash_str = hash_str.lower() predict_list = hash_str.split(',') # get unique items predict_list = set(predict_list) # upload raw data for prediction elif 'raw' in hash: form = DocumentForm(request.POST, request.FILES) if form.is_valid(): print "in API upload predict" newdoc = Document(docfile=request.FILES['docfile']) newdoc.filename = request.FILES['docfile'] if document.file_type == 'ensemble': # upload binary for ensemble predict action_type = 'ensemble_predict' print "for ensemble predict..." else: action_type = 'upload_predict' predict_list.append(newdoc.filename) print "newdoc.filename=", newdoc.filename else: print "Form is invalid!" return Response({"Error": "invalid form"}, status=404) # upload binary for sandbox execution & predict elif hash == 'exec': form = DocumentForm(request.POST, request.FILES) if form.is_valid(): print "in API upload for execution & wait:" exe_type = request.POST.get('_file_type') # handle by _predict; key field is "_file_type" if exe_type is None: print "required field not found!" return Response({"Error": "required field not found."}, status=404) else: return _predict.predict(request, rid, cid=None, msg_id=None, perm=perm, disabled4reader=disabled4reader) else: print "Form is wrong!!" return Response({"Error": "invalid form"}, status=404) else: predict_list.append(hash) # TBD need to check upload count here? for p_item in predict_list: # create newdoc if action_type == "hash_predict": newdoc = Document() newdoc.filename = p_item newdoc.submitted_by = request.user.username newdoc.acl_list = perm newdoc.train_id = str(rid) if action_type == "ensemble": newdoc.file_type = "ensemble_predict" else: newdoc.file_type = "predict" newdoc.ml_n_gram = document.ml_n_gram newdoc.ml_opts = document.ml_opts newdoc.ml_lib = document.ml_lib newdoc.db_host = host newdoc.db_db = db newdoc.db_port = port newdoc.db_tbl = tbl newdoc.save() cid = newdoc.id #upload_fname=p_item #print "before predict_hash *************** " ret = _predict.predict_hash(document, newdoc, p_item, tlabel="", action_type=action_type, host=host, port=port, db=db, tbl=tbl, usr=usr, pwd=pwd, verbose=verbose) #print 'in POST: ret=', ret pred_doc.append(ret) return Response(pred_doc) else: return Response({"error": "data not found"}, status=404)
def predict(request, rid, cid, msg_id, perm, disabled4reader): print 'in _predict.predict(), rid=', rid # get perm #uname,grp,perm,disabled4reader=get_perm(request) document = _list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('atdml.views.list')) # dataset's type: ds_ftype = document.file_type msg_error = "" msg_success = "" tlabel = "" #print 'hello2:', request.method # set message for GET if msg_id == "201": msg_success = settings.MSG_PREDICT_SUCCESS + " Id=" + str(cid) elif msg_id == "205": msg_success = settings.MSG_PREDICT_APK_UPLOAD_SUCCESS + " Id=" + str( cid) elif msg_id == "90201": msg_error = settings.MSG_PREDICT_FAILED elif msg_id == "90202": msg_error = settings.MSG_PREDICT_DUPLICATED elif msg_id == "90901": msg_error = settings.MSG_RECAPTCHA_FAILED elif msg_id and "90902" in msg_id: arr = msg_id.split('.') if len(arr) > 1: # append count to the end msg_error = settings.MSG_UPLOAD_OVER_MAX + " " + arr[1] else: msg_error = settings.MSG_UPLOAD_OVER_MAX # predict action action_type = request.POST.get('_action_type') print 'action_type=', action_type upload_fname = "" newdoc = None # for return only form = DocumentForm() if request.method == 'POST': # =========== ============= print 'in predict POST' dns = document.db_host port = document.db_port db = document.db_db tbl = document.db_tbl hash = "" usr = "" pwd = "" n_gram = document.ml_n_gram opt_str = document.ml_opts lib = document.ml_lib db_proj = document.db_proj if document.db_proj else "" pattern = document.pattern pca_opts = document.ml_pca_opts ml_feat_threshold = request.POST.get('_feat_threshold') if ml_feat_threshold is None or ml_feat_threshold == "": ml_feat_threshold = document.ml_feat_threshold ds_list = document.ds_list if pca_opts is None: pca_opts = "" if pattern is None: pattern = "" # find parent dataset id ds_id = document.train_id if ds_id is None or ds_id == "None" or document.option_state == "new_featuring": ds_id = str(rid) # use self's feature list, if is a feature option #print "hihi" ds_id = str(ds_id) exe_type = request.POST.get('_file_type') if not exe_type is None: exe_type = exe_type.lower() emulater_config = "" from_api = None if "apk" in exe_type and action_type is None: print "_predict.predict() in apk" action_type = 'upload_predict' # for upload apk for execution from API from_api = "y" # upload a file to predict if action_type == 'upload_predict': form = DocumentForm(request.POST, request.FILES) print "exe_type=", exe_type if form.is_valid(): newdoc = Document(docfile=request.FILES['docfile']) newdoc.filename = request.FILES['docfile'] emulater_config = request.POST.get('_emulater_config') pert_flag = None print "emulater_config=", emulater_config # flag for sandbox execution if "apk" in exe_type: if "dynamic" in exe_type: newdoc.status = "apk_queued" newdoc.desc = "has_exe_log" # flag for apk execution log elif "static" in exe_type: newdoc.desc = "apk static" # check if static apk, elif "image" in exe_type: newdoc.file_type = "image_predict" action_type = exe_type pert_flag = request.POST.get('_pert_flag') elif document.file_type == "ensemble": # special type for ensemble action_type = "ensemble" newdoc.file_type = "ensemble_predict" else: # form not valid ========== ==== print 'invalid form' form = DocumentForm() elif action_type == 'hash_predict': hash = request.POST.get('_hash') if hash: hash = hash.lower() dns = request.POST.get("_dns") port = request.POST.get("_port") db = request.POST.get('_db') tbl = request.POST.get('_tbl') usr = request.POST.get('_username') pwd = request.POST.get('_password') print "_hash=", hash print "dns=", dns, "_db=", db newdoc = Document() newdoc.filename = hash upload_fname = hash newdoc.db_host = dns newdoc.db_db = db newdoc.db_port = port newdoc.db_tbl = tbl else: # ajax; for sample predict sname = request.POST.get('filename') #print 'sname=',sname idx = sname.rindex('.') if idx > 0: tlabel = sname[idx + 1:].lower().strip() print 'label=' + tlabel + "<===" newdoc = Document(docfile=sname) newdoc.filename = sname.strip() newdoc.true_label = tlabel newdoc.submitted_by = request.user.username newdoc.acl_list = perm if newdoc.file_type is None: newdoc.file_type = "predict" # TBD newdoc.ml_pca_opts = pca_opts newdoc.ml_feat_threshold = ml_feat_threshold if newdoc.docfile: upload_fname = newdoc.docfile.name #print "docfile.name=", newdoc.docfile.name #print "newdoc.filename=", newdoc.filename print "upload_fname=", upload_fname #print "********************" newdoc.train_id = rid newdoc.save() filename = document.filename # parent filename fnumb = str(document.total_feature_numb) cid = str(newdoc.id) verbose = "1" # default to generate feature list (ret, msg_id, msg)=invoke_pred_script(rid, ds_id, cid, tlabel, upload_fname, filename, fnumb, action_type, ds_ftype \ , dns, port, db, tbl, usr, pwd, db_proj, hash, n_gram, opt_str, lib, pattern, verbose, pca_opts, exe_type, emulater_config \ , ml_feat_threshold, ds_list=ds_list, pert_flag=pert_flag) print "msg_id=", msg_id, ", msg=" + msg # for API if from_api == "y": print "_predict.predict() in from_api:" newdoc = Document.objects.get(id=cid) wdoc = { "id": cid, "status": newdoc.status, "pdate": newdoc.local_processed_date(), "by": newdoc.submitted_by, "filename": newdoc.filename, "true_label": newdoc.true_label, "msg": msg, "prediction": newdoc.prediction, "msg_id": msg_id, "predict_val": newdoc.predict_val, "train_id": newdoc.train_id, "feat_list": "" } return Response([wdoc]) # keep same format as regular pred output if request.is_ajax(): print "Ajax predict************" if msg_id == "90201" or msg_id == "90205": print "cid=", cid, ", msg_id=", msg_id #ret_msg=msg_error ret_data = { "msg": msg + " Id=" + str(cid) + ", filename=[" + newdoc.filename + "]" } print "ret_data", ret_data return HttpResponse(json.dumps(ret_data), content_type="application/json", status=400) #else: # ret_msg=msg_success #print "ret_msg="+ret_msg newdoc = Document.objects.get(id=cid) ret_data = { "id": cid, "status": newdoc.status, "pdate": newdoc.local_processed_date(), "by": newdoc.submitted_by, "filename": newdoc.filename, "true_label": newdoc.true_label, "msg": msg, "prediction": newdoc.prediction } print "json dump=" + json.dumps(ret_data) return HttpResponse(json.dumps(ret_data), content_type="application/json") elif request.method == 'GET': # =========== ============= print 'in _predict.predict2 GET' param_str = document.ml_opts try: jopts = json.loads(document.ml_opts) except: jopts = {} else: # not POST ========== ==== print 'not post' print "echo msg_error=" + msg_error, ", msg_success=" + msg_success predictions = Document.objects.all().filter( Q(file_type__icontains="predict"), train_id=rid).order_by('-id')[0:100] print "pred len=", len(predictions) # get sample file list ds_id = document.train_id if (rid == ds_id or document.option_state == "new_featuring"): ds_id = rid # use self's feature list sflist = get_sfile_list(document.filename, document.id, document.file_type, ds_id) # how to get dir? jopts = document.ml_opts pca_jopts = document.ml_pca_opts if pca_jopts: pca_jopts = json.loads(document.ml_pca_opts) if jopts: jopts = json.loads(document.ml_opts) jopts["learning_algorithm"] = jopts["learning_algorithm"].title( ).replace("_", " ") #print "has_roc=",has_roc,", has_mrun=",has_mrun recaptcha = settings.RECAPTCHA_PREDICT if recaptcha is None: recaptcha = "N" return render( request, 'atdml/predict.html', { 'document': document, 'form': form, 'predictions': predictions, 'disabled4reader': disabled4reader, 'perm': perm, 'msg_error': msg_error, 'msg_success': msg_success, 'sflist': sflist, "jopts": jopts, "pca_jopts": pca_jopts, "MEDIA_URL": settings.MEDIA_URL, "use_recaptcha": recaptcha }, )
def exclude_feature(request,rid, perm,disabled4reader): print "in exclude_feature" msg_id=None document =_list.get_ds_doc(rid, perm) if not document: return HttpResponseRedirect(reverse('list')) # find parent dataset id train_id=document.train_id if train_id is None: train_id=rid excl_feat=None if request.method == 'POST': excl_feat=request.POST.get('hf_w_excl_feat') print "excl_feat=",excl_feat json2save={} fid_dict={} ml_opts=json.loads(document.ml_opts) has_excl_key=0 # check if key exists if "has_excluded_feat" in ml_opts: has_excl_key=1 print "ml_opts=",ml_opts," type=", type(ml_opts),", excl_feat=",excl_feat fid_arr=[] # build dict for excluded feature if not excl_feat is None and len(excl_feat)>0: fid_arr=excl_feat.split(',') # update ml_opts ml_opts["has_excluded_feat"]=1 has_excl_key=1 else: # excl feat was removed if has_excl_key==1: ml_opts["has_excluded_feat"]=0 # only update if excl key exists if has_excl_key==1: # update ml_opts document.ml_opts=json.dumps(ml_opts) #print "ml_opts str=",json.dumps(ml_opts) document.save() # save exclude list to mongo json2save["rid"]=eval(rid) json2save["key"]="feature_excluded" json2save["value"]=fid_arr feat_excl=json.dumps(json2save) #print "feature_excluded=",feat_excl filter='{"rid":'+rid+',"key":"feature_excluded"}' upsert_flag=True #print "filter=",filter,",feat_excl=",feat_excl ## write to mongoDB.myml.dataset_info, ignore doc with duplicated key ret=query_mongo.upsert_doc(settings.MONGO_OUT_DNS, settings.MONGO_OUT_PORT, settings.MONGO_OUT_DB, settings.MONGO_OUT_TBL , settings.MONGO_OUT_USR, settings.MONGO_OUT_PWD ,filter,feat_excl,upsert_flag) print "Upsert count for feat_excl: ret=",ret return HttpResponseRedirect(reverse('result_opts',args=[train_id,rid]))