def recognize (train_file,test_file): print ("1.Parsing sets") datalist = loadtxt(open(train_file, 'r'), dtype='f8', delimiter=',', skiprows=1) joblib.dump(datalist, 'training_set.pkl') datalist = joblib.load('training_set.pkl') label = [x[0] for x in datalist] train = [x[1:] for x in datalist] test = loadtxt(open(test_file, 'r'), dtype='f8', delimiter=',', skiprows=1) joblib.dump(test, 'test_set.pkl') test = joblib.load('test_set.pkl') print ("2.Create and train RF") temp = RandomForestClassifier(n_estimators=100, n_jobs=4) cv = cross_validation.KFold(len(train), n_folds=5, indices=True) scores = [] for train_indices, test_indices in cv: print ('Train: %s | test: %s' % (len(train_indices),len(test_indices))) trainfit = train[train_indices[0]:train_indices[-1]+1] traintest = train [test_indices[0]:test_indices[-1]+1] labelfit = label[train_indices[0]:train_indices[-1]+1] labeltest = label[test_indices[0]:test_indices[-1]+1] scores.append(temp.fit(trainfit, labelfit).score(traintest, labeltest)) print ("Accuracy: " + str(np.array(scores).mean()))
def cities_output(): user = request.args.get('ID') user = re.match('@?(.*)', user).groups()[0] cur = db.cursor() cur.execute("SELECT TWEET FROM tweets_by_user WHERE HANDLE='%s';" % user) query_results = np.array(cur.fetchall()) if len(query_results) == 0: try: stuff = api.user_timeline(screen_name = user, count = 8000, include_rts = False) for status in stuff: tweet =status.text screen_name = user cur.execute("INSERT INTO tweets_by_user (HANDLE, TWEET) VALUES (%s,%s)",(screen_name, tweet)) db.commit() cur.execute("SELECT TWEET FROM tweets_by_user WHERE HANDLE='%s';" % user) query_results = np.array(cur.fetchall()) except: return render_template('not_exist.html', user = user) tweets = [t[0] for t in query_results] vectorizer = joblib.load('/home/jtsitr/twitter_project/vectorizer.pkl') clf = joblib.load('/home/jtsitr/twitter_project/clf.pkl') tweets = vectorizer.transform(tweets) prediction = clf.predict(tweets) if tweets.shape[0]<100: return render_template('not_enough_tweets.html', user=user) else: the_result = np.mean(prediction) try: return render_template("output_final.html", the_result = the_result, user = user) except Exception as e: return render_template('500.html', error = str(e))
def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None): """Fetch a pickled version of the caffe model, represented as list of dictionaries.""" default_filename = os.path.join(VGG_PATH, 'vgg.pickle') if caffemodel_parsed is not None: if os.path.exists(caffemodel_parsed): return joblib.load(caffemodel_parsed) else: if os.path.exists(default_filename): import warnings warnings.warn('Did not find %s, but found %s. Loading it.' % (caffemodel_parsed, default_filename)) return joblib.load(default_filename) else: if os.path.exists(default_filename): return joblib.load(default_filename) # We didn't find the file: let's create it by parsing the protobuffer protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer) model = _parse_caffe_model(protobuf_file) if caffemodel_parsed is not None: joblib.dump(model, caffemodel_parsed) else: joblib.dump(model, default_filename) return model
def predict(self, img_path): img, positions, pix_data, captcha_type = self.read_img(img_path) print positions, captcha_type if positions is None: print('图像切分错误!') return None x = np.array(self.get_pix_list(pix_data, positions, captcha_type)) if captcha_type == 'number': if self.model is None or os.path.isfile(self.number_model_file): self.model = joblib.load(self.number_model_file) else: raise IOError elif self.model is None or os.path.isfile(self.symbol_model_file): self.model = joblib.load(self.symbol_model_file) else: raise IOError predict_label = list() for i in range(x.shape[0]): input = x[i, :] predict_y = self.model.predict(input)[0] if int(predict_y) >= len(self.number_label_list) or int(predict_y) < 0: return "", "" if captcha_type == 'number': predict_label.append(self.number_label_list[predict_y]) else: predict_label.append(self.symbol_label_list[predict_y]) return u"".join(predict_label), self.__caculate(predict_label, captcha_type)
def load_model(self, path): self.clf = joblib.load(os.path.join(path, 'model.pkl')) with open(os.path.join(path, 'labels.json'), 'r') as fo: self.labels = Alphabet.from_dict(json.load(fo)) with open(os.path.join(path, 'model_info.json'), 'r') as fo: self.model_info = json.load(fo) self.features = joblib.load(os.path.join(path, 'featvec.pkl'))
def __init__(self): self.pca = joblib.load("result/pca_model.m") self.scaler = joblib.load("result/scale_model.m") with open("result/A_con.pkl", "rb") as f: self.A = pickle.load(f) with open("result/G_con.pkl", "rb") as f: self.G = pickle.load(f)
def selectFeatures(X,t=0): if(t==0): selector=joblib.load('selector.pkl') else: selector=joblib.load('SelectKBest.pkl') X_new=selector.transform(X) return X_new
def predict(filein_name): """预测 """ filein_name = '0908-12.txt' # get models from sklearn.externals import joblib LR010 = joblib.load('0903_uid_ave_010.pkl') LR001 = joblib.load('0903_uid_ave_001.pkl') LR100 = joblib.load('0903_uid_ave_100.pkl') import scipy.io as sio X = sio.loadmat('uid_dict_X001-12.mat')['X'] y_predict_prob = LR001.predict_proba(X) print(y_predict_prob.shape) sio.savemat(filein_name[:-4] + 'y001.mat', {'y':y_predict_prob}) X = sio.loadmat('uid_dict_X010-12.mat')['X'] y_predict_prob = LR010.predict_proba(X) print(y_predict_prob.shape) sio.savemat(filein_name[:-4] + 'y010.mat', {'y':y_predict_prob}) X = sio.loadmat('uid_dict_X100-12.mat')['X'] y_predict_prob = LR100.predict_proba(X) print(y_predict_prob.shape) sio.savemat(filein_name[:-4] + 'y100.mat', {'y':y_predict_prob})
def train_pipeline(kind, cut, vectorizer, model_trainer, do_cut=False, do_vectorizer=False, record_num=None): print('reading...') alltext, accu_label, law_label, time_label = data.read_trainData("./data/data_train.json", record_num) if do_cut: print('cutting...') train_text = cut.cut(alltext) joblib.dump(train_text, './data/{}_cut_train.txt'.format(cut.name)) print('cleaning...') cleaner = Cleaner() cleaned_train_text = cleaner.clean(train_text) joblib.dump(cleaned_train_text, './data/{}_cut_train_cleaned.txt'.format(cut.name)) else: print('load existing cut file {}...'.format('./data/{}_cut_train_cleaned.txt'.format(cut.name))) cleaned_train_text = joblib.load('./data/{}_cut_train_cleaned.txt'.format(cut.name)) vectorizer_name = '{}_{}'.format(cut.name, vectorizer.name) if do_vectorizer: print('{} training...'.format(vectorizer_name)) vectorizer = vectorizer.train(cleaned_train_text) joblib.dump(vectorizer, './model/{}/predictor/model/{}_vectorizer.model'.format(model_trainer.name, vectorizer_name)) print('{} vectorizing...'.format(vectorizer)) vec = vectorizer.transform(cleaned_train_text) joblib.dump(vec, './data/vec_{}.txt'.format(vectorizer_name)) else: print('load existing vec file {}...'.format('./data/vec_{}.txt'.format(vectorizer_name))) vec = joblib.load('./data/vec_{}.txt'.format(vectorizer_name)) print('{} training...'.format(kind)) model = model_trainer.train(vec, accu_label) joblib.dump(model, './model/{}/predictor/model/{}_{}.model'.format(model_trainer.name, vectorizer_name, kind))
def CV_trainModel(): # 数据预处理 label_has = joblib.load('data/label_has.pkl') traindata_has = joblib.load('data/data_has.pkl') label_no = joblib.load('data/label_no.pkl') traindata_no = joblib.load('data/data_no.pkl') traindata = np.vstack([traindata_has,traindata_no]) labels = np.hstack([label_has,label_no]) # print traindata.dtype # print labels.dtype traindata = np.float32(traindata) labels = np.int32(labels) model = SVM(C=1.0, gamma=1.0) model.train(traindata,labels) model.save('model/svm.dat') #model.load('model/svm.dat') return model
def train_classifier(): pos_feat_path = positive_features_path neg_feat_path = negative_features_path model_path = classifier_model_path feature_vectors = [] labels = [] for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")): fd = joblib.load(feat_path) print len(fd) if len(fd): fd = fd.astype(numpy.object) feature_vectors.append(fd) labels.append(1) for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")): fd = joblib.load(feat_path) print len(fd) if len(fd): fd = fd.astype(numpy.object) feature_vectors.append(fd) labels.append(0) classifier = LinearSVC() print "Training classifier" classifier.fit(feature_vectors, labels) print "Classifier successfully trained" if not os.path.isdir(os.path.split(model_path)[0]): os.makedirs(os.path.split(model_path)[0]) joblib.dump(classifier, model_path)
def init_api(app, es_util): # 加载.pkl model = joblib.load(r'app\rules\modelspkl\knowledge_cart.pkl') tf_transformer = joblib.load(r'app\rules\modelspkl\knowledge_tf_transformer.pkl') le = joblib.load(r'app\rules\modelspkl\knowledge_labelencoder.pkl') @app.route('/ml_error_predict_batch', methods=['POST']) def error_predict(): parmStr = request.get_data() paramDict = json.loads(parmStr) testline = paramDict['index'] """ 获取用户信息 :return: json """ auth = Auth() result = auth.identify(request) if (result['status']): test_datas = readDataLine(testline) if test_datas != None: test_feature_datas = tf_transformer.transform(test_datas) # 进行预测 pred = model.predict(test_feature_datas) result = le.inverse_transform(pred) print ('预测结果:%s,预测内容:%s' % (result[0], testline)) #logging.debug('预测结果:%s,预测内容:%s' % (result[0], testline)) return result[0] return "this is None" else: return jsonify(result)
def trainModel(): # 数据预处理 data_train = joblib.load('data/data_train.pkl') label_train = joblib.load('data/label_train.pkl') print data_train.shape clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=True) #clf.set_params(kernel='rbf') print clf print data_train.shape print label_train.shape print 'begin training....' clf.fit(data_train,label_train) print 'finish training....' print clf joblib.dump(clf, 'model/svm.pkl') return None
def varianceInProductGroups(df): nonlocal state print("Making: varianceInProductGroups") if state == 1 and os.path.exists('pickleFiles/colorStd.pkl') and os.path.exists('pickleFiles/sizeStd.pkl'): sizeStd = joblib.load('pickleFiles/sizeStd.pkl') colorStd = joblib.load('pickleFiles/colorStd.pkl') elif state == 0 and os.path.exists('pickleFiles/colorStd_test.pkl') and os.path.exists('pickleFiles/sizeStd_test.pkl'): sizeStd = joblib.load('pickleFiles/sizeStd_test.pkl') colorStd = joblib.load('pickleFiles/colorStd_test.pkl') else: products = df.groupby('productGroup') sizeStd, colorStd = {},{} for idx,product in products: if idx not in sizeStd or idx not in colorStd: size = np.std(list(Counter(product['sizeCode']).values())) color = np.std(list(Counter(product['colorCode']).values())) sizeStd[idx] = size colorStd[idx] = color if state == 1: joblib.dump(sizeStd,'pickleFiles/sizeStd.pkl') joblib.dump(colorStd,'pickleFiles/colorStd.pkl') else: joblib.dump(sizeStd,'pickleFiles/sizeStd_test.pkl') joblib.dump(colorStd,'pickleFiles/colorStd_test.pkl') df['sizeStd'] = df['productGroup'].map(sizeStd) df['colorStd'] = df['productGroup'].map(colorStd) return df
def event2semsim(event): import os from sklearn.externals import joblib if isinstance(event, str): etype = event else: etype = event.type if etype == "accident": return joblib.load(os.path.join( os.getenv("TREC_DATA"), "semsim", "accidents.norm-stem.lam20.000.pkl")) elif etype== "earthquake" or etype == "storm" or etype == "impact event": return joblib.load( os.path.join( os.getenv("TREC_DATA"), "semsim", "natural-disasters.norm-stem.lam20.000.pkl")) elif etype == "protest" or etype == "riot": return joblib.load( os.path.join( os.getenv("TREC_DATA"), "semsim", "social-unrest.norm-stem.lam1.000.pkl")) elif etype == "shooting" or etype == "bombing" or etype == "conflict" or \ etype == "hostage": return joblib.load(os.path.join( os.getenv("TREC_DATA"), "semsim", "terrorism.norm-stem.lam10.000.pkl"))
def averageColor(df): nonlocal state print("Making: averageColor") if state == 1 and os.path.exists('pickleFiles/averageColor.pkl'): averageColor = joblib.load('pickleFiles/averageColor.pkl') elif state == 0 and os.path.exists('pickleFiles/averageColor_test.pkl'): averageColor = joblib.load('pickleFiles/averageColor_test.pkl') else: allColor = {} #find all the colours that customers buy for i in df.index: currCustomer = df['customerID'][i] if currCustomer not in allColor: allColor[currCustomer] = [df['colorCode'][i]] else: allColor[currCustomer].append(df['colorCode'][i]) averageColor = {} for entry in allColor: if entry not in averageColor: averageColor[entry] = np.mean(allColor[entry]) if state == 1: joblib.dump(averageColor,'pickleFiles/averageColor.pkl') else: joblib.dump(averageColor,'pickleFiles/averageColor_test.pkl') avgcolor = pd.Series(name= 'averageColor', index=df.index) for i in df.index: customer = df['customerID'][i] avgcolor.set_value(i,averageColor[customer]) df['averageColor'] = avgcolor return df
def cheapskateItems(df): nonlocal state print("Making: cheapskateItems") if state == 1 and os.path.exists('pickleFiles/voucherToArticle.pkl'): voucherDic = joblib.load('pickleFiles/voucherToArticle.pkl') elif state == 0 and os.path.exists('pickleFiles/voucherToArticle_test.pkl'): voucherDic = joblib.load('pickleFiles/voucherToArticle_test.pkl') else: voucherDic = {} vouchers = df.groupby('voucherID') for idx,voucher in vouchers: if idx not in voucherDic: voucherDic[idx] = Counter(voucher['articleID']).most_common()[0][0] if state == 1: joblib.dump(voucherDic,'pickleFiles/voucherToArticle.pkl') else: joblib.dump(voucherDic,'pickleFiles/voucherToArticle_test.pkl') articleSet = set(voucherDic.values()) cheapArticle = pd.Series(name='cheapArticle',index=df.index) for i in df.index: article = df['articleID'][i] isCheap = 1 if article in articleSet else 0 cheapArticle.set_value(i,isCheap) df['cheapArticle'] = cheapArticle return df
def colorPopularity(df): print('Making: colorPopularity') nonlocal state if state == True and os.path.exists('pickleFiles/colorMap.pkl'): colorMap = joblib.load('pickleFiles/colorMap.pkl') elif state==False and os.path.exists('pickleFiles/colorMap_test.pkl'): colorMap = joblib.load('pickleFiles/colorMap_test.pkl') else: colorCount = Counter(df['colorCode']) popularColors = [i[0] for i in colorCount.most_common(5)] shittyColors = [j[0] for j in colorCount.most_common()[::-1] if j[1] < 5] colorMap = {} for color in df['colorCode']: if color not in colorMap: if color in popularColors: colorMap[color] = "popular" elif color in shittyColors: colorMap[color] = "unpopular" else: colorMap[color] = "neutral" if state == True: joblib.dump(colorMap,'pickleFiles/colorMap.pkl') else: joblib.dump(colorMap,'pickleFiles/colorMap_test.pkl') df['colorPopularity'] = df['colorCode'].map(colorMap) return df
def modeSize(df): nonlocal state print('Making: mostFrequentSize and differenceSize') if state == 1 and os.path.exists('pickleFiles/modeSizesBought.pkl'): modeSizeData = joblib.load('pickleFiles/modeSizesBought.pkl') elif state == 0 and os.path.exists('pickleFiles/modeSizesBought_test.pkl'): modeSizeData = joblib.load('pickleFiles/modeSizesBought_test.pkl') else: allSize = {} for i in df.index: #find all sizes purchased by customers currCust = df['customerID'][i] if currCust not in allSize: allSize[currCust] = [df['sizeCode'][i]] else: allSize[currCust].append(df['sizeCode'][i]) modeSize = {} for customer in allSize: if customer not in modeSize: mode = Counter(allSize[customer]).most_common(1)[0][0] modeSize[customer] = mode if state == 1: joblib.dump(modeSize,'pickleFiles/modeSizesBought.pkl') else: joblib.dump(modeSize,'pickleFiles/modeSizesBought_test.pkl') modeSizeData = modeSize mostFrequentSize = pd.Series(name= 'mostFrequentSize', index=df.index) for i in df.index: customer = df['customerID'][i] mostFrequentSize.set_value(i,modeSizeData[customer]) df['modeSize'] = mostFrequentSize df['differenceModeSize'] = abs(mostFrequentSize - df['sizeCode']) return df
def callback(ch, method, properties, body): print(" [x] Received %r" % (body,)) answer = dict() message = str(body) features = calculate_features(message[2:-2]) features = np.array(features).reshape(1, -1) #scaler = StandardScaler() #features = scaler.fit_transform(features) file = 'Resources/AGE_model.pkl' age_model = joblib.load(file) file = 'Resources/GENDER_model.pkl' sex_model = joblib.load(file) answer['age'] = [age_model.predict(features)] age = answer['age'][0][0:1][0] age = str(age).replace("\r", "") age = str(age).replace("\n", "") answer['gender'] = [sex_model.predict(features)] gender = answer['gender'][0][0:1][0] print('Age: ' + str(age) + ', gender: ' + str(gender)) print(" [x] Done") ch.basic_ack(delivery_tag=method.delivery_tag)
def predict_category_subcategory(book_name): data_set1 = pandas.Series(book_name.encode('ascii')) #Data Preprocessing data_set1 = data_set1.dropna(axis=0,how='any') data_set1 = data_set1.str.lower() #Manual removal List remove_list = ['edition','ed','edn', 'vol' , 'vol.' , '-' ,'i'] data_set1[0] =' '.join([i for i in data_set1[0].split() if i not in remove_list]) data_set1 = data_set1.apply(lambda x :re.sub(r'\w*\d\w*', '', x).strip()) data_set1 = data_set1.apply(lambda x :re.sub(r'\([^)]*\)', ' ', x)) data_set1 = data_set1.apply(lambda x :re.sub('[^A-Za-z0-9]+', ' ', x)) #data_set['Category ID'] = data_set['Category ID']+"|"+data_set['Subcategory ID'] #Stemming the book titles stemmer = LancasterStemmer() data_set1[0]=" ".join([stemmer.stem(i) for i in data_set1[0].split()]) clf = joblib.load(os.path.join(BASE_DIR+"/learners/",'category_predict.pkl')) ans = clf.predict(data_set1) sub_clf = joblib.load(os.path.join(BASE_DIR+"/learners/",'subcategory_predict.pkl')) sub_ans = sub_clf.predict(data_set1) return [ans[0],sub_ans[0]]
def put(self): startgeocode_json = (request.form['startgeocode']) endgeocode_json = (request.form['endgeocode']) tripdistance_json = (request.form['tripdistance']) import json startgeocode = json.loads(startgeocode_json) endgeocode = json.loads(endgeocode_json) startlat = float(startgeocode['lat']) startlng = float(startgeocode['lng']) endlat = float(endgeocode['lat']) endlng = float(endgeocode['lng']) tripdist = float(tripdistance_json.split(" ")[0]) hour = int(request.form['hour']) dayofweek = int(request.form['dayofweek']) lowspeedclf = joblib.load(os.path.join(APP_STATIC, 'costtime.pkl')) lowspeedx = [startlat,startlng,endlat,endlng,hour,dayofweek,tripdist] lowspeedy = int(lowspeedclf.predict(lowspeedx)[0]) tripdurationclf = joblib.load(os.path.join(APP_STATIC, 'trip_duration.pkl')) tripduration_x = lowspeedx trip_duration_y = tripdurationclf.predict(tripduration_x)[0] duration_list = [] lowspeed_list = [] for i in range(24): x = [startlat,startlng,endlat,endlng,i,dayofweek,tripdist] duration_list.append([i,int(tripdurationclf.predict(x)[0])]) lowspeed_list.append([i,int(lowspeedclf.predict(x)[0])]) return {"lowspeedtime":lowspeedy, "tripduration":trip_duration_y, "duration_list":duration_list, "lowspeed_list":lowspeed_list}
def train_and_single_label(train_filename, test_filename, clf, pickled): """ Only return one example ID for each q_id """ if pickled: train_data = joblib.load(train_filename) test_data = joblib.load(test_filename) else: train_data = extract_ibm_data(train_filename) test_data = extract_ibm_data(test_filename, test_file=True) X = train_data["data"] y = train_data["target"] clf.fit(X, y) labels = clf.predict(test_data["data"]) # now manipulate the results using test_data['q_id'] to filter the labels ##NEW CODE: used_qids = [] results = [] for i in range(len(labels)): if labels[i] == "true": if not test_data["q_id"][i] in used_qids: results.append(test_data["id"][i]) used_qids.append(test_data["q_id"][i]) return results
def loadModule(mode): global movieReviewer try: movieReviewer = joblib.load("./SVM/movieReviewer%s.svm" % mode) except: import SVMTrain movieReviewer = joblib.load("./SVM/movieReviewer%s.svm" % mode)
def __init__(self): if ("model.pkl" in os.listdir()) and ("enc.pkl" in os.listdir()): self.model = joblib.load("model.pkl") self.enc = joblib.load("enc.pkl") else: self.refit_from_scratch()
def roc_precision_final(db, fac=1): if (os.path.exists(MAT_PATH) == False): os.mkdir(MAT_PATH) random_state = check_random_state(0) print("Loading {}...".format(db)) clf = joblib.load("clfs/" + db) classes = clf.classes_ print("Loading test set...") loaded = joblib.load("testSet/" + db) y_true = loaded[:, -1] print("Predict proba...") y_score = clf.predict_proba(loaded[:, 0:-1]) loaded = 0 clf = 0 y_score = y_score[:, classes == 1] * fac print("ROC...") if (fac != 1): db = db + str(fac) fpr, tpr, thresholds = roc_curve(y_true, y_score) sio.savemat(MAT_PATH + 'final.roc.' + db + '.mat', {'fpr':fpr, 'tpr':tpr, 'thresholds':thresholds}) print("Precision/Recall...") precision, recall, thresholds = precision_recall_curve(y_true, y_score) sio.savemat(MAT_PATH + 'final.precall.' + db + '.mat', {'precision':precision, 'recall':recall, 'thresholds':thresholds})
def load_models(path="models",models={}): x = os.listdir(path) models = models for i in x: try: if not i.startswith('.') and not i.startswith('_') and os.path.isdir(os.path.join(path, i)): way = os.path.join(path, i) clf = glob.glob(os.path.join(way,"clf_*.pkl")) vec = glob.glob(os.path.join(way,"vectorizer_*.pkl")) print(". %s"%(way)) if len(clf)!=1 or len(vec)!=1: print("└── No model found in '%s'. Skipped."%(i)) continue t0=time() sys.stdout.flush() print("├── Loading classifier '%s'..."%(i)) sys.stdout.flush() if "clf_%s"%(i) not in models: models["clf_%s"%(i)] = joblib.load(clf[0]) print("├── Done. [%.02fs]"%(time()-t0)) sys.stdout.flush() t0=time() print("├── Loading vectorizer '%s'..."%(i)) sys.stdout.flush() if "vectorizer_%s"%(i) not in models: models["vectorizer_%s"%(i)] = joblib.load(vec[0]) print("└── Done. [%.02fs]"%(time()-t0)) sys.stdout.flush() t0=time() except: print(">> Error on '%s', skipped."%(i)) return models
def getClassifiers(self): if not os.path.exists(self.outDir): os.mkdir(self.outDir) outDir = self.outDir + os.sep + "classPickle" if not os.path.exists(outDir): os.mkdir(outDir) class1Save = outDir + os.sep + "classifier1.pkl" class2Save = outDir + os.sep + "classifier2.pkl" class1Exists = os.path.exists(class1Save) class2Exists = os.path.exists(class2Save) if not (class1Exists and class2Exists): self._setupTempDir() self.fitsFiles = [f[:-5] for f in os.listdir(self.fitsFolder) if ".fits" in f] self.fitsFilesLoc = [os.path.abspath(self.fitsFolder + os.sep + f) for f in os.listdir(self.fitsFolder) if ".fits" in f] for f in self.fitsFiles: self.mainCatalog[f] = self.getCatalog(self.fitsFolder + os.sep + f + ".fits", ishape=True) self.candidateMask[f] = self._getCandidateMask(self.mainCatalog[f], np.loadtxt(self.fitsFolder + os.sep + f + ".txt")) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'WEIGHT', self.candidateMask[f] * 1.0, usemask=False) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'EXTENDED', self.candidateMask[f], usemask=False) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'HLR', np.zeros(self.mainCatalog[f].shape), usemask=False) self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'MAG', np.zeros(self.mainCatalog[f].shape), usemask=False) self._trainClassifier() joblib.dump(self.sc, class1Save) joblib.dump(self.sc2, class2Save) else: self.sc = joblib.load(class1Save) self.sc2 = joblib.load(class2Save) #self._testClassifier(catalog, candidateMask) #self._cleanTempDir() self._debug("Classifier generated. Now you can invoke .clasify(catalog)")
def _train(self, train_data, resources): sample_length = len(train_data) dict_status_path = os.path.join(root_dic, 'dict_vectorizer_{}.status'. format(sample_length)) if os.path.isfile(dict_status_path): dictVectorizer = joblib.load(dict_status_path) else: dictVectorizer = DictVectorizer() dictVectorizer.fit(train_data[self.features]. fillna(0). to_dict('record')) joblib.dump(dictVectorizer, dict_status_path) tfidf_status_path = os.path.join(root_dic, 'tfidf_vectorizer_{}.status'. format(sample_length)) if os.path.isfile(tfidf_status_path): tfidf = joblib.load(tfidf_status_path) else: tfidf = TfidfVectorizer(min_df=40, max_features=300) tfidf.fit(train_data.essay) joblib.dump(tfidf, tfidf_status_path) resources['dictVectorizer'] = dictVectorizer resources['tfidf'] = tfidf print 'Head Processing Completed' return train_data, resources
def load_model(model_path): gen_params_values = joblib.load(model_path+'_gen_params.jl') for p, v in izip(gen_params, gen_params_values): p.set_value(v) discrim_params_values = joblib.load(model_path+'_discrim_params.jl') for p, v in izip(discrim_params, discrim_params_values): p.set_value(v)
def do_gbdt(train_x, train_y, test_x=None, test_y=None, learning_rate=0.03, max_depth=8, max_features=25, n_estimators=600, load=False, save=True, outfile=None, search=False): if search == False: mdl_name = 'gbdt_train_lr' + str(learning_rate) + '_n' + str( n_estimators) + '_maxdep' + str(max_depth) + '.pkl' if os.path.exists(mdl_name) == True: clf_gbdt = joblib.load(mdl_name) else: # create gradient boosting clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate, max_depth=max_depth, max_features=max_features, n_estimators=n_estimators) #n_estimators=500, learning_rate=0.5, max_depth=3) clf_gbdt.fit(train_x, train_y) if save == True: try: _ = joblib.dump(clf_gbdt, mdl_name, compress=1) except: print("*** Save GBM model to pickle failed!!!") if outfile != None: outfile.write("*** Save RF model to pickle failed!!!") if test_x != None and test_y != None: probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1] score_gbdt = roc_auc_score(test_y, probas_gbdt) print("GBDT ROC score", score_gbdt) return clf_gbdt else: max_depth_list = [5, 6, 7] n_list = [2000, 3000] lr_list = [0.01, 0.005] info = {} for md in max_depth_list: for n in n_list: for lr in lr_list: print 'max_depth = ', md print 'n = ', n print 'learning rate = ', lr clf_gbdt = GradientBoostingClassifier( learning_rate=learning_rate, max_depth=md, max_features=max_features, n_estimators=n_estimators) # n_estimators=500, learning_rate=0.5, max_depth=3) clf_gbdt.fit(train_x, train_y) probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1] score_gbdt = roc_auc_score(test_y, probas_gbdt) info[md, n, lr] = score_gbdt for md in info: scores = info[md] print( 'GBDT max_depth = %d, n = %d, lr = %.5f, ROC score = %.5f(%.5f)' % (md[0], md[1], md[2], scores.mean(), scores.std()))
source_wav_file = sys.argv[1] gmm_file = sys.argv[2] converted_wav_file = sys.argv[3] # 変換元のwavファイルからメルケプストラムとピッチを抽出 # numpyで読みやすいようにアスキー形式で保存 print "extract mcep ..." source_mcep_file = "source.mcep_ascii" extract_mcep(source_wav_file, source_mcep_file, ascii=True) print "extract pitch ..." source_pitch_file = "source.pitch" extract_pitch(source_wav_file, source_pitch_file) # GMMをロード gmm = joblib.load(gmm_file) # 変換元のメルケプストラムをGMMで変換 # SPTKで合成できるようにバイナリ形式で保存 print "convert mcep ..." converted_mcep_file = "converted.mcep" convert_mcep(source_mcep_file, converted_mcep_file, gmm) # 変換元のピッチと変換したメルケプストラムから再合成 print "synthesis ..." synthesis(source_pitch_file, converted_mcep_file, converted_wav_file) # 一時ファイルを削除 os.remove(source_mcep_file) os.remove(source_pitch_file) os.remove(converted_mcep_file)
def load_model(model): joblib.load(model)
raise # hancle Index exceptions except IndexError: counter = counter + 1 # print("counts : ",counter) # print("error handled") # save all the train and test vectors joblib.dump(train_data, 'train_data.pkl') joblib.dump(test_data, 'test_data.pkl') joblib.dump(train_labels, 'train_labels.pkl') joblib.dump(test_labels, 'test_labels.pkl') else: # run only if create_vectors_again is unset # load all the train and test vectors train_data = joblib.load('train_data.pkl') test_data = joblib.load('test_data.pkl') train_labels = joblib.load('train_labels.pkl') test_labels = joblib.load('test_labels.pkl') # fit the model again if start_training_model is set if R_SEG.start_training_model == 1: clf = R_SEG.trainClassifier( R_SEG.select_classifier, train_data, train_labels ) else: # load the fitted model if start_training_model is unset clf = joblib.load( R_SEG.select_classifier + '_model.pkl' ) # find all the predictions on all test images predictions = R_SEG.predictImages(clf, test_data, test_labels) # save all the images R_SEG.saveSegmentedImage( num_test, image_filenames_test, predictions )
import pandas as pd import matplotlib.pyplot as plt import numpy as np import json from random import randint from matplotlib.lines import Line2D from sklearn.cluster import KMeans from scipy.spatial import distance from sklearn.externals import joblib clf = joblib.load('Mood_disorder_45') clf.predict() ''' data = pd.read_csv('GTEx_v7_brain_subGEM-log-no.txt',sep='\t') data = data.transpose() print(data.shape) data = data.fillna(0) global_max = data.max().max() global_min = data.min().min() print(global_max) ''' centroid = clf.cluster_centers_ num_clusters = 45 fig = plt.figure() for i in range(0,num_clusters):
if __name__ == "__main__": import argparse import sys # Initialize parser parser = argparse.ArgumentParser() # Adding optional argument parser.add_argument("-file", "--File", help="Input Test File Path") parser.add_argument("-model", "--Model", help="Input Model Path") # Read arguments from command line args = parser.parse_args() if args.File: final_test, X = create_test_data(file_path=str(args.File)) if args.Model: model = joblib.load(str(args.Model)) y_pred_test = model.predict(X) classes = {0: 'SAD', 1: 'HAPPY'} final_test['MOOD_TAG'] = y_pred_test final_test['MOOD_TAG'] = [ classes[item] for item in final_test['MOOD_TAG'] ] file_name = "evaluation_classified.csv" final_test.to_csv(file_name, index=None, header=True)
files_tr = sorted(glob('../data/train_f*.f')) # USE_PREF li = [] for i in files_tr: for j in USE_PREF: if j in i: li.append(i) break files_tr = li [print(i, f) for i, f in enumerate(files_tr)] X_train = pd.concat( [pd.read_feather(f) for f in tqdm(files_tr, mininterval=30)] + [joblib.load('../external/X_train_nejumi.pkl.gz')], axis=1) y_train = utils.load_target()['HasDetections'] # drop if len(col_drop) > 0: X_train.drop(col_drop, axis=1, inplace=True) if X_train.columns.duplicated().sum() > 0: raise Exception( f'duplicated!: { X_train.columns[X_train.columns.duplicated()] }') print('no dup :) ') print(f'X_train.shape {X_train.shape}') gc.collect()
def load_model(serialized_model): model = joblib.load(serialized_model) logging.info("Model loaded from %s", serialized_model) return model
# get data from database def get_data(): sql_con=MySQLdb.connect( host='127.0.0.1', port= 3306, user='******', passwd='XXXXXX', db='Hackthon2019', use_unicode=True, charset="utf8" ) sql_cur=sql_con.cursor() sql_cur.execute("SELECT using FROM training_data") using=sql_con.commit() sql_cur.execute("SELECT pnexts FROM training_data") pre_nexts=sql_con.commit() return (using, pre_nexts) # train model training() model = joblib.load(model_name) now_using = "photoshop" result = model.predict(now_using) # send message to windows computer notify = Notify() notify.register() notify.send("Also open "+str(result)+"?") # should be picture
def do_RF(train_x, train_y, test_x=None, test_y=None, n_estimators=2000, max_depth=20, max_features=20, criterion='entropy', method='isotonic', cv=5, min_samples_leaf=1, min_samples_split=13, random_state=4141, n_jobs=-1, load=False, save=True, outfile=None, search=False): if search == False: #mdl_name = 'rf_train_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '_maxfeat' + str(max_features) \ mdl_name = 'rf_isotonic_train_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '_maxfeat' + str(max_features) \ + '_minSamLeaf' + str(min_samples_leaf) + '_minSamSplit' + str(min_samples_split) + '.pkl' if os.path.exists(mdl_name) == True: clf_rf_isotonic = joblib.load(mdl_name) else: clf_rf = RandomForestClassifier( n_estimators=n_estimators, max_depth=max_depth, max_features=max_features, criterion=criterion, min_samples_leaf=min_samples_leaf, min_samples_split=min_samples_split, random_state=random_state, n_jobs=n_jobs) clf_rf_isotonic = CalibratedClassifierCV(clf_rf, cv=cv, method=method) clf_rf_isotonic.fit(train_x, train_y) if save == True: try: _ = joblib.dump(clf_rf_isotonic, mdl_name, compress=1) except: print("*** Save RF model to pickle failed!!!") if outfile != None: outfile.write("*** Save RF model to pickle failed!!!") if test_x != None and test_y != None: probas_rf = clf_rf_isotonic.predict_proba(test_x)[:, 1] score_rf = roc_auc_score(test_y, probas_rf) print("RF ROC score", score_rf) return clf_rf_isotonic else: if test_x == None or test_y == None: print "Have to provide test_x and test_y to do grid search!" return -1 min_samples_split = [10, 11, 12] max_depth_list = [15, 20, 25] n_list = [2000] max_feat_list = [10, 20, 30] info = {} for mss in min_samples_split: for max_depth in max_depth_list: #for n in n_list: for max_features in max_feat_list: print 'max_features = ', max_features n = 2000 print 'n = ', n print 'min_samples_split = ', mss print 'max_depth = ', max_depth clf_rf = RandomForestClassifier( n_estimators=n, max_depth=max_depth, max_features=max_features, criterion=criterion, min_samples_leaf=min_samples_leaf, min_samples_split=mss, random_state=random_state, n_jobs=n_jobs) #clf_rf.fit(train_x, train_y) clf_rf_isotonic = CalibratedClassifierCV(clf_rf, cv=cv, method=method) clf_rf_isotonic.fit(train_x, train_y) probas_rf = clf_rf_isotonic.predict_proba(test_x)[:, 1] scores = roc_auc_score(test_y, probas_rf) info[max_features, mss, max_depth] = scores for mss in info: scores = info[mss] print( 'clf_rf_isotonic: max_features = %d, min_samples_split = %d, max_depth = %d, ROC score = %.5f(%.5f)' % (mss[0], mss[1], mss[2], scores.mean(), scores.std()))
"B":{ "0":396.9 }, "LSTAT":{ "0":4.98 } result looks like: { "prediction": [ <val> ] } """ # Logging the input payload json_payload = request.json LOG.info(f"JSON payload: \n{json_payload}") inference_payload = pd.DataFrame(json_payload) LOG.info(f"Inference payload DataFrame: \n{inference_payload}") # scale the input scaled_payload = scale(inference_payload) # get an output prediction from the pretrained model, clf prediction = list(clf.predict(scaled_payload)) # TO DO: Log the output prediction value LOG.info(f"prediction: {prediction}") return jsonify({'prediction': prediction}) if __name__ == "__main__": # load pretrained model as clf clf = joblib.load("./model_data/boston_housing_prediction.joblib") app.run(host='0.0.0.0', port=80, debug=True) # specify port=80
def load_model(filename): return joblib.load(filename)
include_top=False) # in_tensor = base_model.inputs[0] out_tensor = base_model.outputs[0] out_tensor = tf.keras.layers.GlobalAveragePooling2D()(out_tensor) # Define the full model by the endpoints model = tf.keras.models.Model(inputs=[in_tensor], outputs=[out_tensor]) # Compile the model for execution. Losses and optimizers can be # anything here, since we don't train the model model.compile(loss="categorical_crossentropy", optimizer='sgd') LDA = joblib.load('trained_LDA') with open("submission_LDA.csv", "w") as fp: fp.write("Id,Category\n") # Image index i = 0 # 1. load image and resize for file in os.listdir("test\\testset"): if file.endswith(".jpg"): # Load the image img = plt.imread("test\\testset\\" + file) # Resize it to the net input size: img = cv2.resize(img, (224, 224))
import keyboard import collections from sklearn.svm import LinearSVC from sklearn.svm import SVC from sklearn import svm from sklearn.externals import joblib import time from scipy.ndimage.filters import gaussian_filter from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import ExtraTreesClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis #svm_model_linear = joblib.load('filtered.pkl') clf = joblib.load('Quad.pkl') print("press cntrl to see real time data!") class MyListener(myo.DeviceListener): def __init__(self, queue_size=8): self.lock = threading.Lock() self.emg_data_queue = collections.deque(maxlen=queue_size) self.gyro_data_queue = collections.deque(maxlen=3) self.ori_data_queue = collections.deque(maxlen=4) self.acc_data_queue = collections.deque(maxlen=3) def on_connect(self, device, timestamp, firmware_version): device.set_stream_emg(myo.StreamEmg.enabled) def on_emg_data(self, device, timestamp, emg_data):
#import numpy as np #import matplotlib.pyplot as plt #import pandas as pd import re from sklearn.externals import joblib import pickle #saved_classifier = joblib.load('saved_classifier.sav') saved_classifier = joblib.load("class.pkl") my_cv = joblib.load("my_cv.pkl") '''import re import nltk nltk.download('stopwords')''' from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer ps = PorterStemmer() #dataset = pd.read_csv('train.csv') #train = dataset.iloc[0:20001, 1:] #y = dataset.iloc[0:len(train), 2:].values sentences = [] '''for i in range(0,len(train)): sent = re.sub(r"i'm", "i am", train['comment_text'][i]) sent = re.sub(r"he's", "he is", train['comment_text'][i]) sent = re.sub(r"she's", "she is", train['comment_text'][i]) sent = re.sub(r"that's", "that is", train['comment_text'][i]) sent = re.sub(r"what's", "what is", train['comment_text'][i]) sent = re.sub(r"where's", "where is", train['comment_text'][i]) sent = re.sub(r"how's", "how is", train['comment_text'][i])
parser.add_argument('--enable_overwrite', action='store_true') arguments = parser.parse_args() pprint(vars(arguments)) # read parameters from speaker yml sconf1 = SpeakerYML(arguments.org_yml) sconf2 = SpeakerYML(arguments.tar_yml) pconf = PairYML(arguments.pair_yml) # read GMM for mcep mcepgmm = GMMConvertor( n_mix=pconf.GMM_mcep_n_mix, covtype=pconf.GMM_mcep_covtype, gmmmode=None, ) param = joblib.load(arguments.gmm) mcepgmm.open_from_param(param) # constract FeatureExtractor class feat1 = FeatureExtractor( analyzer=sconf1.analyzer, fs=sconf1.wav_fs, fftl=sconf1.wav_fftl, shiftms=sconf1.wav_shiftms, minf0=sconf1.f0_minf0, maxf0=sconf1.f0_maxf0, ) feat2 = FeatureExtractor( analyzer=sconf2.analyzer, fs=sconf2.wav_fs, fftl=sconf2.wav_fftl,
lrelu = activations.LeakyRectify(leak=0.2) sigmoid = activations.Sigmoid() trX, vaX, teX, trY, vaY, teY = pastaBlackWhite() vaX = floatX(vaX) / 127.5 - 1. trX = floatX(trX) / 127.5 - 1. teX = floatX(teX) / 127.5 - 1. X = T.tensor4() desc = 'cond_dcgan' epoch = 5999 params = [ sharedX(p) for p in joblib.load('models/%s/%d_discrim_params.jl' % (desc, epoch)) ] print desc.upper() print 'epoch %d' % epoch def mean_and_var(X): u = T.mean(X, axis=[0, 2, 3]) s = T.mean(T.sqr(X - u.dimshuffle('x', 0, 'x', 'x')), axis=[0, 2, 3]) return u, s def bnorm_statistics(X, w, w2, g2, b2, w3, g3, b3, wy): h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h2 = dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))
def loadModel(modelPath): log("Start load model: ", modelPath) clf = joblib.load(modelPath) return clf
def reload(self, filename): self.logger.info("reload") self.clf = joblib.load(filename)
def predicao(): ss = StandardScaler() ss = StandardScaler() # desabilita mensagens de aviso pd.options.mode.chained_assignment = None # default='warn' # obtem dados para criar modelo df = pd.read_csv('registro_candidatos.csv') # obter recursos e resultados correspondentes feature_names = [ 'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao', 'ErrosPraticos' ] training_features = df[feature_names] outcome_name = ['Recomenda'] outcome_labels = df[outcome_name] # listar recursos com base no tipo numeric_feature_names = ['MediaAvaliacao', 'ErrosPraticos'] categoricial_feature_names = ['Nota', 'DinamicadeGrupo', 'Agressividade'] # Ajusta como 'scaler' os recursos numéricos ss.fit(training_features[numeric_feature_names]) # scale numeric features now training_features[numeric_feature_names] = ss.transform( training_features[numeric_feature_names]) training_features = pd.get_dummies(training_features, columns=categoricial_feature_names) # print(training_features) categorical_engineered_features = list( set(training_features.columns) - set(numeric_feature_names)) feature_names = [ 'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao', 'ErrosPraticos' ] numeric_feature_names = ['MediaAvaliacao', 'ErrosPraticos'] categoricial_feature_names = ['Nota', 'DinamicadeGrupo', 'Agressividade'] model = joblib.load(r'Model/model.pickle') scaler = joblib.load(r'Scaler/scaler.pickle') ## novos dados para classificação root.geometry("500x500") w = Label(root, text="Nome do Candidato") w.pack() nome2 = Entry(root) nome2.pack() nome2.delete(0, END) nome2.insert(0, "") w = Label(root, text="") w.pack() w = Label( root, text="Digite a Nota:\n(Excelente - Alta - Boa - Média - Ruim - Péssima)" ) w.pack() nota2 = Entry(root) nota2.pack() nota2.delete(0, END) nota2.insert(0, "") w = Label(root, text="") w.pack() w = Label(root, text="Tem dinâmica de Grupo? (Sim - Não)") w.pack() dinamica = Entry(root) dinamica.pack() dinamica.delete(0, END) dinamica.insert(0, "") w = Label(root, text="") w.pack() w = Label(root, text="Tem agressividade? (Sim - Não)") w.pack() agressivida = Entry(root) agressivida.pack() agressivida.delete(0, END) w = Label(root, text="") w.pack() w = Label(root, text="Média na avaliação:") w.pack() media = Entry(root) media.pack() media.delete(0, END) w = Label(root, text="") w.pack() w = Label(root, text="Erros em teste prático:") w.pack() erros = Entry(root) erros.pack() erros.delete(0, END) w = Label(root, text="") w.pack() def consul(): nome = nome2.get() nota = nota2.get() dinamicadegrupo = dinamica.get() agressividade = agressivida.get() mediaavaliacao = int(media.get()) errospraticos = int(erros.get()) new_data = pd.DataFrame([{ 'Nome': nome, 'Nota': nota, 'DinamicadeGrupo': dinamicadegrupo, 'Agressividade': agressividade, 'MediaAvaliacao': int(mediaavaliacao), 'ErrosPraticos': int(errospraticos) }]) new_data = new_data[[ 'Nome', 'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao', 'ErrosPraticos' ]] ## preparando predição com novos dados prediction_features = new_data[feature_names] # escalando prediction_features[numeric_feature_names] = scaler.transform( prediction_features[numeric_feature_names]) # Variáveis categóricas prediction_features = pd.get_dummies( prediction_features, columns=categoricial_feature_names) # adicionar coluna de recurso categórico ausente current_categorical_engineered_features = set( prediction_features.columns) - set(numeric_feature_names) missing_features = set(categorical_engineered_features ) - current_categorical_engineered_features for feature in missing_features: # add zeros, desde que o recurso esteja ausente nessas amostras de dados prediction_features[feature] = [0] * len(prediction_features) # predição usando o modelo previamente treinado predictions = model.predict(prediction_features) # Resultados new_data['Recomenda'] = predictions print(new_data) tkMessageBox.showinfo( "Consulta Concluída!", "O sistema recomenda este candidato:\n *** " + str(new_data['Recomenda'][0] + str("***\n"))) busca = Button(root, text="Consultar", command=lambda: consul()) busca.pack() root.mainloop()
#!C:\Users\sujan\AppData\Local\Programs\Python\Python37\python print("Content-type: text/html\r\n\n") # -*- coding: utf-8 -*- #importing libraries from sklearn.externals import joblib import inputScript import sys #load the pickle file classifier = joblib.load('rf_final.pkl') #input url url = sys.argv[1] #checking and predicting checkprediction = inputScript.main(url) prediction = classifier.predict(checkprediction) if prediction == 1: print(" THIS IS PHISHING URL") else: print(" THIS IS NOT PHISHING URL")
for plot_num, folder in enumerate(to_consider): df = pd.read_csv("../../data/hvac/minutes_%s.csv" % folder) df["hvac_class_copy"] = df["hvac_class"].copy() df = df[df.dataid.isin(find_common_dataids())] df.index = range(len(df)) if NUM_CLASSES == 2: df.hvac_class[(df.hvac_class == "Average") | (df.hvac_class == "Good")] = "Not bad" COLUMN_NAMES = ["Bad", "Not bad"] else: COLUMN_NAMES = ["Average", "Bad", "Good"] np.random.seed(0) clf = joblib.load( os.path.expanduser("~/git/nilm-actionable/data/hvac/rf_hvac.pkl")) true_labels = df['hvac_class'].values pred_labels = clf.predict(df[list(f)]) numeric_cols = f df[numeric_cols] = df[numeric_cols].div(df[numeric_cols].max()) accur = accuracy_multiclass(true_labels, pred_labels) print folder print accur print pd.value_counts(pred_labels) confusion_df = pd.DataFrame(confusion_matrix(true_labels, pred_labels), index=["Feedback", "No Feedback"], columns=["Feedback", "No Feedback"]) sns.heatmap(confusion_df, annot=True, fmt="d", linewidths=.5, ax=ax) #ax.set_title(return_name(folder)[0])
lemmatizer = WordNetLemmatizer() clean_tokens = [] for tok in tokens: clean_tok = lemmatizer.lemmatize(tok).lower().strip() clean_tokens.append(clean_tok) return clean_tokens # load data engine = create_engine('sqlite:///../data/DisasterResponse.db') df = pd.read_sql_table('DisasterRis', engine) # load model model = joblib.load("../models/classifier.pkl") # index webpage displays cool visuals and receives user input text for model @app.route('/') @app.route('/index') def index(): # extract data needed for visuals # TODO: Below is an example - modify to extract data for your own visuals genre_counts = df.groupby('genre').count()['message'] genre_names = list(genre_counts.index) # create visuals # TODO: Below is an example - modify to create your own visuals catg_nam = df.iloc[:, 4:].columns
def load_model(filename="./model_data.pkl"): model_data = joblib.load(filename) return model_data["classifier"], model_data["scaler"]
def load_model_and_scaler(from_dir): with open(f"{from_dir}/model.json", "r") as f: simpmodel = model_from_json(f.read()) simpmodel.load_weights(f"{from_dir}/model.h5") scaler = joblib.load(f"{from_dir}/scaler.save") return simpmodel, scaler
def get_samples(foldername, filter=None): samples = [] for file in os.listdir(foldername): if filter and file.find(filter) == -1: continue for sample in sample_file(foldername + '/' + file).get_samples(): samples.append(sample) return samples if __name__ == '__main__': arguments = docopt.docopt(__doc__) filters = {'dancing': 0, 'walking': 1, 'sitting': 2} if arguments['--model']: clf = joblib.load(arguments['--model']) else: training = dataset('../datasets/training', filters) svr = svm.SVC() exponential_range = [pow(10, i) for i in range(-4, 1)] parameters = { 'kernel': ['linear', 'rbf'], 'C': exponential_range, 'gamma': exponential_range } clf = grid_search.GridSearchCV(svr, parameters, n_jobs=8, verbose=True) clf.fit(training.data, training.target) joblib.dump(clf, '../models/1s_6sps.pkl') print clf
return preds if __name__ == '__main__': ''' call like python model_predict.py IBM 5DayWindowBestLongBuyPrice python model_predict.py GSPC 5DayWindowBestLongBuyPrice IBM ''' ticker = sys.argv[1] target = sys.argv[2] target_ticker = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] != '>' else ticker # Target Feature Set test_features_file = config.GetTestingFeaturesFileName(ticker) predictions_file = config.GetPredictionsFileName(ticker, target, target_ticker) model_file = config.GetModelFileName(ticker, target, target_ticker) # reconstitute trained model print "Loading model..." model = joblib.load(model_file) print "Loading data..." test_features = numpy.load(test_features_file) print "Making predictions..." predictions = predict(model, test_features) print "Saving predictions to file..." numpy.save(predictions_file, predictions)
DB.create_tables([Prediction], safe=True) # End database stuff ######################################## ######################################## # Unpickle the previously-trained model with open('columns.json') as fh: columns = json.load(fh) pipeline = joblib.load('pipeline.pickle') with open('dtypes.pickle', 'rb') as fh: dtypes = pickle.load(fh) # End model un-pickling ######################################## ######################################## # Begin webserver stuff app = Flask(__name__)
yield (x, y, img[y:y + windowsize[1], x:x + windowsize[0]]) def hogs(img): gx = cv2.Sobel(img, cv2.CV_32F, 1, 0) gy = cv2.Sobel(img, cv2.CV_32F, 0, 1) mag, ang = cv2.cartToPolar(gx, gy) bins = np.int32(bin_n*ang/(2*np.pi)) # quantizing binvalues in (0...16) bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:] mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:] hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)] hist = np.hstack(hists) return hist # hist is a 64 bit vector path =os.getcwd() Classifier = joblib.load('linear_2.pkl') fi = path +"/svm/json/label_test.json" with open(fi,'r') as files: label_test = np.array(json.load(files)) fi = path +"/svm/json/feature_test.json" with open(fi,'r') as files: feature_test = np.array(json.load(files)) print "predicting.." predict = Classifier.predict(feature_test) print "Expected output:",label_test print "Predicted output:",predict print "Confusion Matrix:\n",metrics.confusion_matrix(label_test,predict) print "Fowlkes Mallows Score",fowlkes_mallows_score(label_test,predict)
spct += 1 print "Training Completed" confusion_matrix = np.zeros((total_sp, total_sp)) tct = 0 for speaker in speakers: if tct <= 0: tct = len(glob.glob('test_wavdata/' + speaker + '/*.wav')) for testcasefile in glob.glob('test_wavdata/' + speaker + '/*.wav'): [Fs, x] = audioBasicIO.readAudioFile(testcasefile) features = extract_MFCCs(x, Fs, window * Fs, window_overlap * Fs, voiced_threshold_mul, voiced_threshold_range, calc_deltas) max_score = -9999999 max_speaker = speaker for modelfile in sorted(glob.glob('train_models/*.pkl')): gmm = joblib.load(modelfile) score = gmm.score(features) if score > max_score: max_score, max_speaker = score, modelfile.replace( 'train_models/', '').replace('.pkl', '') print speaker + " -> " + max_speaker + (" Y" if speaker == max_speaker else " N") confusion_matrix[speakers[speaker]][speakers[max_speaker]] += 1 print "Accuracy: ", (sum([ confusion_matrix[i][j] if i == j else 0 for i in xrange(total_sp) for j in xrange(total_sp) ]) * 100) / float(tct * total_sp)