def recognize (train_file,test_file):
    print ("1.Parsing sets")
    datalist = loadtxt(open(train_file, 'r'), dtype='f8', delimiter=',', skiprows=1)
    joblib.dump(datalist, 'training_set.pkl')
    datalist = joblib.load('training_set.pkl')
    label = [x[0] for x in datalist]
    train = [x[1:] for x in datalist]
    test = loadtxt(open(test_file, 'r'), dtype='f8', delimiter=',', skiprows=1)
    joblib.dump(test, 'test_set.pkl')
    test = joblib.load('test_set.pkl')

    print ("2.Create and train RF")
    temp = RandomForestClassifier(n_estimators=100, n_jobs=4)
    cv = cross_validation.KFold(len(train), n_folds=5, indices=True)

    scores = []
    
    for train_indices, test_indices in cv:
        print ('Train: %s | test: %s' % (len(train_indices),len(test_indices)))
        trainfit = train[train_indices[0]:train_indices[-1]+1]
        traintest = train [test_indices[0]:test_indices[-1]+1]
        labelfit = label[train_indices[0]:train_indices[-1]+1]
        labeltest = label[test_indices[0]:test_indices[-1]+1]
        scores.append(temp.fit(trainfit, labelfit).score(traintest, labeltest))

    print ("Accuracy: " + str(np.array(scores).mean()))
Example #2
0
def cities_output():
    user = request.args.get('ID')
    user = re.match('@?(.*)', user).groups()[0]
    cur = db.cursor()
    cur.execute("SELECT TWEET FROM tweets_by_user WHERE HANDLE='%s';" % user)
    query_results = np.array(cur.fetchall())
    if len(query_results) == 0:
        try:    
            stuff = api.user_timeline(screen_name = user, count = 8000, include_rts = False)
        
            for status in stuff:
                tweet =status.text
                screen_name = user
                cur.execute("INSERT INTO tweets_by_user (HANDLE, TWEET) VALUES (%s,%s)",(screen_name, tweet))
            db.commit()
            cur.execute("SELECT TWEET FROM tweets_by_user WHERE HANDLE='%s';" % user)
            query_results = np.array(cur.fetchall())
        except:
            return render_template('not_exist.html', user = user)
    tweets = [t[0] for t in query_results]
    vectorizer = joblib.load('/home/jtsitr/twitter_project/vectorizer.pkl')
    clf = joblib.load('/home/jtsitr/twitter_project/clf.pkl')
    tweets = vectorizer.transform(tweets)
    prediction = clf.predict(tweets)
    if tweets.shape[0]<100:
        return render_template('not_enough_tweets.html', user=user)
    else:
        the_result = np.mean(prediction)
        try:
            return render_template("output_final.html", the_result = the_result, user = user)
        except Exception as e:
            return render_template('500.html', error = str(e))
Example #3
0
def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None):
    """Fetch a pickled version of the caffe model, represented as list of
    dictionaries."""

    default_filename = os.path.join(VGG_PATH, 'vgg.pickle')
    if caffemodel_parsed is not None:
        if os.path.exists(caffemodel_parsed):
            return joblib.load(caffemodel_parsed)
        else:
            if os.path.exists(default_filename):
                import warnings
                warnings.warn('Did not find %s, but found %s. Loading it.' %
                              (caffemodel_parsed, default_filename))
                return joblib.load(default_filename)
    else:
        if os.path.exists(default_filename):
            return joblib.load(default_filename)

    # We didn't find the file: let's create it by parsing the protobuffer
    protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer)
    model = _parse_caffe_model(protobuf_file)

    if caffemodel_parsed is not None:
        joblib.dump(model, caffemodel_parsed)
    else:
        joblib.dump(model, default_filename)

    return model
Example #4
0
    def predict(self, img_path):
        img, positions, pix_data, captcha_type = self.read_img(img_path)
        print positions, captcha_type
        if positions is None:
            print('图像切分错误!')
            return None
        x = np.array(self.get_pix_list(pix_data, positions, captcha_type))
        if captcha_type == 'number':
            if self.model is None or os.path.isfile(self.number_model_file):
                self.model = joblib.load(self.number_model_file)
            else:
                raise IOError
        elif self.model is None or os.path.isfile(self.symbol_model_file):
                self.model = joblib.load(self.symbol_model_file)
        else:
            raise IOError
        predict_label = list()
        for i in range(x.shape[0]):
            input = x[i, :]
            predict_y = self.model.predict(input)[0]
            if int(predict_y) >= len(self.number_label_list) or int(predict_y) < 0:
                return "", ""
            if captcha_type == 'number':
                predict_label.append(self.number_label_list[predict_y])
            else:
                predict_label.append(self.symbol_label_list[predict_y])

        return u"".join(predict_label), self.__caculate(predict_label, captcha_type)
Example #5
0
 def load_model(self, path):
     self.clf = joblib.load(os.path.join(path, 'model.pkl'))
     with open(os.path.join(path, 'labels.json'), 'r') as fo:
         self.labels = Alphabet.from_dict(json.load(fo))
     with open(os.path.join(path, 'model_info.json'), 'r') as fo:
         self.model_info = json.load(fo)            
     self.features = joblib.load(os.path.join(path, 'featvec.pkl'))
 def __init__(self):
     self.pca = joblib.load("result/pca_model.m")
     self.scaler = joblib.load("result/scale_model.m")
     with open("result/A_con.pkl", "rb") as f:
         self.A = pickle.load(f)
     with open("result/G_con.pkl", "rb") as f:
         self.G = pickle.load(f)
Example #7
0
def selectFeatures(X,t=0):
    if(t==0):
        selector=joblib.load('selector.pkl')
    else:
        selector=joblib.load('SelectKBest.pkl')
    X_new=selector.transform(X)
    return X_new
def predict(filein_name):
    """预测

    """
    filein_name = '0908-12.txt'
    # get models
    from sklearn.externals import joblib
    LR010 = joblib.load('0903_uid_ave_010.pkl') 
    LR001 = joblib.load('0903_uid_ave_001.pkl') 
    LR100 = joblib.load('0903_uid_ave_100.pkl') 

    import scipy.io as sio

    X = sio.loadmat('uid_dict_X001-12.mat')['X']
    y_predict_prob = LR001.predict_proba(X)
    print(y_predict_prob.shape)
    sio.savemat(filein_name[:-4] + 'y001.mat', {'y':y_predict_prob})

    X = sio.loadmat('uid_dict_X010-12.mat')['X']
    y_predict_prob = LR010.predict_proba(X)
    print(y_predict_prob.shape)
    sio.savemat(filein_name[:-4] + 'y010.mat', {'y':y_predict_prob})

    X = sio.loadmat('uid_dict_X100-12.mat')['X']
    y_predict_prob = LR100.predict_proba(X)
    print(y_predict_prob.shape)
    sio.savemat(filein_name[:-4] + 'y100.mat', {'y':y_predict_prob})
Example #9
0
def train_pipeline(kind, cut, vectorizer, model_trainer, do_cut=False, do_vectorizer=False, record_num=None):
    print('reading...')
    alltext, accu_label, law_label, time_label = data.read_trainData("./data/data_train.json", record_num)

    if do_cut:
        print('cutting...')
        train_text = cut.cut(alltext)
        joblib.dump(train_text, './data/{}_cut_train.txt'.format(cut.name))

        print('cleaning...')
        cleaner = Cleaner()
        cleaned_train_text = cleaner.clean(train_text)
        joblib.dump(cleaned_train_text, './data/{}_cut_train_cleaned.txt'.format(cut.name))
    else:
        print('load existing cut file {}...'.format('./data/{}_cut_train_cleaned.txt'.format(cut.name)))
        cleaned_train_text = joblib.load('./data/{}_cut_train_cleaned.txt'.format(cut.name))

    vectorizer_name = '{}_{}'.format(cut.name, vectorizer.name)
    if do_vectorizer:
        print('{} training...'.format(vectorizer_name))
        vectorizer = vectorizer.train(cleaned_train_text)
        joblib.dump(vectorizer,
                    './model/{}/predictor/model/{}_vectorizer.model'.format(model_trainer.name, vectorizer_name))
        print('{} vectorizing...'.format(vectorizer))
        vec = vectorizer.transform(cleaned_train_text)
        joblib.dump(vec, './data/vec_{}.txt'.format(vectorizer_name))
    else:
        print('load existing vec file {}...'.format('./data/vec_{}.txt'.format(vectorizer_name)))
        vec = joblib.load('./data/vec_{}.txt'.format(vectorizer_name))

    print('{} training...'.format(kind))
    model = model_trainer.train(vec, accu_label)
    joblib.dump(model, './model/{}/predictor/model/{}_{}.model'.format(model_trainer.name, vectorizer_name, kind))
Example #10
0
def CV_trainModel():

	# 数据预处理
	label_has = joblib.load('data/label_has.pkl')
	traindata_has = joblib.load('data/data_has.pkl')

	label_no = joblib.load('data/label_no.pkl')
	traindata_no = joblib.load('data/data_no.pkl')

	traindata = np.vstack([traindata_has,traindata_no])
	labels = np.hstack([label_has,label_no])

	# print traindata.dtype
	# print labels.dtype

	traindata = np.float32(traindata)
	labels = np.int32(labels)

	model = SVM(C=1.0, gamma=1.0)

	model.train(traindata,labels)
	model.save('model/svm.dat')
	#model.load('model/svm.dat')

	return model
def train_classifier():
    pos_feat_path = positive_features_path
    neg_feat_path = negative_features_path

    model_path = classifier_model_path

    feature_vectors = []
    labels = []

    for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(1)

    for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(0)

    classifier = LinearSVC()
    print "Training classifier"
    classifier.fit(feature_vectors, labels)
    print "Classifier successfully trained"
    if not os.path.isdir(os.path.split(model_path)[0]):
        os.makedirs(os.path.split(model_path)[0])
    joblib.dump(classifier, model_path)
def init_api(app, es_util):
    # 加载.pkl
    model = joblib.load(r'app\rules\modelspkl\knowledge_cart.pkl')
    tf_transformer = joblib.load(r'app\rules\modelspkl\knowledge_tf_transformer.pkl')
    le = joblib.load(r'app\rules\modelspkl\knowledge_labelencoder.pkl')

    @app.route('/ml_error_predict_batch', methods=['POST'])
    def error_predict():
        parmStr = request.get_data()
        paramDict = json.loads(parmStr)
        testline = paramDict['index']


        """
        获取用户信息
        :return: json
        """
        auth = Auth()
        result = auth.identify(request)

        if (result['status']):
            test_datas = readDataLine(testline)
            if test_datas != None:
                test_feature_datas = tf_transformer.transform(test_datas)

                # 进行预测
                pred = model.predict(test_feature_datas)
                result = le.inverse_transform(pred)
                print ('预测结果:%s,预测内容:%s' % (result[0], testline))
                #logging.debug('预测结果:%s,预测内容:%s' % (result[0], testline))
                return result[0]
            return "this is None"
        else:
            return jsonify(result)
Example #13
0
def trainModel():

	# 数据预处理
	data_train = joblib.load('data/data_train.pkl')
	label_train = joblib.load('data/label_train.pkl')

	print data_train.shape

	clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=True)

	#clf.set_params(kernel='rbf')

	print clf

	print data_train.shape
	print label_train.shape

	print 'begin training....'
	clf.fit(data_train,label_train)
	print 'finish training....'
	print clf
	joblib.dump(clf, 'model/svm.pkl')
	
	return None
Example #14
0
 def varianceInProductGroups(df):
     nonlocal state
     print("Making: varianceInProductGroups")
     if state == 1 and os.path.exists('pickleFiles/colorStd.pkl') and os.path.exists('pickleFiles/sizeStd.pkl'):
         sizeStd = joblib.load('pickleFiles/sizeStd.pkl')
         colorStd = joblib.load('pickleFiles/colorStd.pkl')
     elif state == 0 and os.path.exists('pickleFiles/colorStd_test.pkl') and os.path.exists('pickleFiles/sizeStd_test.pkl'):
         sizeStd = joblib.load('pickleFiles/sizeStd_test.pkl')
         colorStd = joblib.load('pickleFiles/colorStd_test.pkl')
     else:
         products = df.groupby('productGroup')
         sizeStd, colorStd = {},{}
         for idx,product in products:
             if idx not in sizeStd or idx not in colorStd:
                 size = np.std(list(Counter(product['sizeCode']).values()))
                 color = np.std(list(Counter(product['colorCode']).values()))
                 sizeStd[idx] = size
                 colorStd[idx] = color
         if state == 1:
             joblib.dump(sizeStd,'pickleFiles/sizeStd.pkl')
             joblib.dump(colorStd,'pickleFiles/colorStd.pkl')
         else:
             joblib.dump(sizeStd,'pickleFiles/sizeStd_test.pkl')
             joblib.dump(colorStd,'pickleFiles/colorStd_test.pkl')
     df['sizeStd'] = df['productGroup'].map(sizeStd)
     df['colorStd'] = df['productGroup'].map(colorStd)
     return df
Example #15
0
File: misc.py Project: kedz/cuttsum
def event2semsim(event):
    import os
    from sklearn.externals import joblib
    if isinstance(event, str):
        etype = event
    else:
        etype = event.type
    if etype == "accident":
        return joblib.load(os.path.join(
            os.getenv("TREC_DATA"),
            "semsim", "accidents.norm-stem.lam20.000.pkl"))
    elif etype== "earthquake" or etype == "storm" or etype == "impact event":
        return joblib.load(
            os.path.join(
                os.getenv("TREC_DATA"), 
                "semsim", "natural-disasters.norm-stem.lam20.000.pkl"))
    elif etype == "protest" or etype == "riot":
        return joblib.load(
            os.path.join(
                os.getenv("TREC_DATA"), 
                "semsim", "social-unrest.norm-stem.lam1.000.pkl"))
    elif etype == "shooting" or etype == "bombing" or etype == "conflict" or \
            etype == "hostage":
        return joblib.load(os.path.join(
            os.getenv("TREC_DATA"),
            "semsim", "terrorism.norm-stem.lam10.000.pkl"))
Example #16
0
 def averageColor(df):
     nonlocal state
     print("Making: averageColor")
     if state == 1 and os.path.exists('pickleFiles/averageColor.pkl'):
         averageColor = joblib.load('pickleFiles/averageColor.pkl')
     elif state == 0 and os.path.exists('pickleFiles/averageColor_test.pkl'):
         averageColor = joblib.load('pickleFiles/averageColor_test.pkl')
     else:
         allColor = {} #find all the colours that customers buy
         for i in df.index:
             currCustomer = df['customerID'][i]
             if currCustomer not in allColor:
                 allColor[currCustomer] = [df['colorCode'][i]]
             else:
                 allColor[currCustomer].append(df['colorCode'][i])
         averageColor = {}
         for entry in allColor:
             if entry not in averageColor:
                 averageColor[entry] = np.mean(allColor[entry])
         if state == 1:
             joblib.dump(averageColor,'pickleFiles/averageColor.pkl')
         else:
             joblib.dump(averageColor,'pickleFiles/averageColor_test.pkl')
     avgcolor = pd.Series(name= 'averageColor', index=df.index)
     for i in df.index:
         customer = df['customerID'][i]
         avgcolor.set_value(i,averageColor[customer])
     df['averageColor'] = avgcolor
     return df
Example #17
0
    def cheapskateItems(df):
        nonlocal state
        print("Making: cheapskateItems")
        if state == 1 and os.path.exists('pickleFiles/voucherToArticle.pkl'):
            voucherDic = joblib.load('pickleFiles/voucherToArticle.pkl')
        elif state == 0 and os.path.exists('pickleFiles/voucherToArticle_test.pkl'):
            voucherDic = joblib.load('pickleFiles/voucherToArticle_test.pkl')
        else:
            voucherDic = {}
            vouchers = df.groupby('voucherID')
            for idx,voucher in vouchers:
                if idx not in voucherDic:
                    voucherDic[idx] = Counter(voucher['articleID']).most_common()[0][0]
            if state == 1:
                joblib.dump(voucherDic,'pickleFiles/voucherToArticle.pkl')
            else:
                joblib.dump(voucherDic,'pickleFiles/voucherToArticle_test.pkl')

        articleSet = set(voucherDic.values())
        cheapArticle = pd.Series(name='cheapArticle',index=df.index)
        for i in df.index:
            article = df['articleID'][i]
            isCheap = 1 if article in articleSet else 0
            cheapArticle.set_value(i,isCheap)
        df['cheapArticle'] = cheapArticle
        return df
Example #18
0
 def colorPopularity(df):
     print('Making: colorPopularity')
     nonlocal state
     if state == True and os.path.exists('pickleFiles/colorMap.pkl'):
         colorMap = joblib.load('pickleFiles/colorMap.pkl')
     elif state==False and os.path.exists('pickleFiles/colorMap_test.pkl'):
         colorMap = joblib.load('pickleFiles/colorMap_test.pkl')
     else:
         colorCount = Counter(df['colorCode'])
         popularColors = [i[0] for i in colorCount.most_common(5)]
         shittyColors = [j[0] for j in colorCount.most_common()[::-1] if j[1] < 5]
         colorMap = {}
         for color in df['colorCode']:
             if color not in colorMap:
                 if color in popularColors:
                     colorMap[color] = "popular"
                 elif color in shittyColors:
                     colorMap[color] = "unpopular"
                 else:
                     colorMap[color] = "neutral"
         if state == True:
             joblib.dump(colorMap,'pickleFiles/colorMap.pkl')
         else:
             joblib.dump(colorMap,'pickleFiles/colorMap_test.pkl')
     df['colorPopularity'] = df['colorCode'].map(colorMap)
     return df
Example #19
0
    def modeSize(df):
        nonlocal state
        print('Making: mostFrequentSize and differenceSize')
        if state == 1 and os.path.exists('pickleFiles/modeSizesBought.pkl'):
            modeSizeData = joblib.load('pickleFiles/modeSizesBought.pkl')
        elif state == 0 and os.path.exists('pickleFiles/modeSizesBought_test.pkl'):
            modeSizeData = joblib.load('pickleFiles/modeSizesBought_test.pkl')
        else:
            allSize = {}
            for i in df.index: #find all sizes purchased by customers
                currCust = df['customerID'][i]
                if currCust not in allSize:
                    allSize[currCust] = [df['sizeCode'][i]]
                else:
                    allSize[currCust].append(df['sizeCode'][i])
            modeSize = {}
            for customer in allSize:
                if customer not in modeSize:
                    mode = Counter(allSize[customer]).most_common(1)[0][0]
                    modeSize[customer] = mode

            if state == 1:
                joblib.dump(modeSize,'pickleFiles/modeSizesBought.pkl')
            else:
                joblib.dump(modeSize,'pickleFiles/modeSizesBought_test.pkl')
            modeSizeData = modeSize

        mostFrequentSize = pd.Series(name= 'mostFrequentSize', index=df.index)
        for i in df.index:
            customer = df['customerID'][i]
            mostFrequentSize.set_value(i,modeSizeData[customer])
        df['modeSize'] = mostFrequentSize
        df['differenceModeSize'] = abs(mostFrequentSize - df['sizeCode'])
        return df
def callback(ch, method, properties, body):
    print(" [x] Received %r" % (body,))
    answer = dict()
    message = str(body)
    features = calculate_features(message[2:-2])
    features = np.array(features).reshape(1, -1)
    #scaler = StandardScaler()
    #features = scaler.fit_transform(features)

    file = 'Resources/AGE_model.pkl'
    age_model = joblib.load(file)

    file = 'Resources/GENDER_model.pkl'
    sex_model = joblib.load(file)

    answer['age'] = [age_model.predict(features)]
    age = answer['age'][0][0:1][0]

    age = str(age).replace("\r", "")
    age = str(age).replace("\n", "")
    answer['gender'] = [sex_model.predict(features)]
    gender = answer['gender'][0][0:1][0]

    print('Age: ' + str(age) + ', gender: ' + str(gender))
    print(" [x] Done")
    ch.basic_ack(delivery_tag=method.delivery_tag)
Example #21
0
def predict_category_subcategory(book_name):
	data_set1 = pandas.Series(book_name.encode('ascii'))

    #Data Preprocessing
	data_set1 = data_set1.dropna(axis=0,how='any')
	data_set1 = data_set1.str.lower()

    #Manual removal List
	remove_list = ['edition','ed','edn', 'vol' , 'vol.' , '-' ,'i']


	data_set1[0] =' '.join([i for i in data_set1[0].split() if i not in remove_list])

	data_set1 = data_set1.apply(lambda x :re.sub(r'\w*\d\w*', '', x).strip())
	data_set1 = data_set1.apply(lambda x :re.sub(r'\([^)]*\)', ' ', x))
	data_set1 = data_set1.apply(lambda x :re.sub('[^A-Za-z0-9]+', ' ', x))
    #data_set['Category ID'] = data_set['Category ID']+"|"+data_set['Subcategory ID']


    #Stemming the book titles
	stemmer = LancasterStemmer()
	data_set1[0]=" ".join([stemmer.stem(i) for i in  data_set1[0].split()])

	clf = joblib.load(os.path.join(BASE_DIR+"/learners/",'category_predict.pkl'))
	ans = clf.predict(data_set1)
	sub_clf = joblib.load(os.path.join(BASE_DIR+"/learners/",'subcategory_predict.pkl'))
	sub_ans = sub_clf.predict(data_set1)
	return [ans[0],sub_ans[0]]
Example #22
0
    def put(self):
        startgeocode_json = (request.form['startgeocode'])
        endgeocode_json = (request.form['endgeocode'])
        tripdistance_json = (request.form['tripdistance'])
        import json
        startgeocode = json.loads(startgeocode_json)
        endgeocode = json.loads(endgeocode_json)
        startlat = float(startgeocode['lat'])
        startlng = float(startgeocode['lng'])
        endlat = float(endgeocode['lat'])
        endlng = float(endgeocode['lng'])
        tripdist = float(tripdistance_json.split(" ")[0])
        hour = int(request.form['hour'])
        dayofweek = int(request.form['dayofweek'])
        lowspeedclf = joblib.load(os.path.join(APP_STATIC, 'costtime.pkl'))
        lowspeedx = [startlat,startlng,endlat,endlng,hour,dayofweek,tripdist]
        lowspeedy = int(lowspeedclf.predict(lowspeedx)[0])
        tripdurationclf = joblib.load(os.path.join(APP_STATIC, 'trip_duration.pkl'))
        tripduration_x = lowspeedx
        trip_duration_y = tripdurationclf.predict(tripduration_x)[0]

        duration_list = []
        lowspeed_list = []
        for i in range(24):
            x = [startlat,startlng,endlat,endlng,i,dayofweek,tripdist]
            duration_list.append([i,int(tripdurationclf.predict(x)[0])])
            lowspeed_list.append([i,int(lowspeedclf.predict(x)[0])])

        return {"lowspeedtime":lowspeedy, "tripduration":trip_duration_y, "duration_list":duration_list, "lowspeed_list":lowspeed_list}
Example #23
0
def train_and_single_label(train_filename, test_filename, clf, pickled):
    """ Only return one example ID for each q_id
    """
    if pickled:
        train_data = joblib.load(train_filename)
        test_data = joblib.load(test_filename)
    else:
        train_data = extract_ibm_data(train_filename)
        test_data = extract_ibm_data(test_filename, test_file=True)

    X = train_data["data"]
    y = train_data["target"]
    clf.fit(X, y)

    labels = clf.predict(test_data["data"])
    # now manipulate the results using test_data['q_id'] to filter the labels
    ##NEW CODE:
    used_qids = []
    results = []
    for i in range(len(labels)):
        if labels[i] == "true":
            if not test_data["q_id"][i] in used_qids:
                results.append(test_data["id"][i])
                used_qids.append(test_data["q_id"][i])
    return results
Example #24
0
def loadModule(mode):
    global movieReviewer
    try:
        movieReviewer = joblib.load("./SVM/movieReviewer%s.svm" % mode)
    except:
        import SVMTrain
        movieReviewer = joblib.load("./SVM/movieReviewer%s.svm" % mode)
Example #25
0
    def __init__(self):
        if ("model.pkl" in os.listdir()) and ("enc.pkl" in os.listdir()):
            self.model = joblib.load("model.pkl")
            self.enc = joblib.load("enc.pkl")

        else:
            self.refit_from_scratch()
Example #26
0
def roc_precision_final(db, fac=1):
    if (os.path.exists(MAT_PATH) == False):
        os.mkdir(MAT_PATH)
        
    random_state = check_random_state(0)
    
    print("Loading {}...".format(db))
    clf = joblib.load("clfs/" + db)
        
    classes = clf.classes_
    
    print("Loading test set...")
    loaded = joblib.load("testSet/" + db)
    y_true = loaded[:, -1]

    
    print("Predict proba...")
    y_score = clf.predict_proba(loaded[:, 0:-1])
    loaded = 0
    clf = 0
    y_score = y_score[:, classes == 1] * fac
    
    print("ROC...")
    if (fac != 1):
        db = db + str(fac)
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    sio.savemat(MAT_PATH + 'final.roc.' + db + '.mat', {'fpr':fpr, 'tpr':tpr, 'thresholds':thresholds})
    
    print("Precision/Recall...")
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    sio.savemat(MAT_PATH + 'final.precall.' + db + '.mat', {'precision':precision, 'recall':recall, 'thresholds':thresholds})
Example #27
0
def load_models(path="models",models={}):
    x = os.listdir(path)
    models = models
    for i in x:
        try:
            if not i.startswith('.') and not i.startswith('_') and os.path.isdir(os.path.join(path, i)):
                way = os.path.join(path, i)
                clf = glob.glob(os.path.join(way,"clf_*.pkl"))
                vec = glob.glob(os.path.join(way,"vectorizer_*.pkl"))
                print(". %s"%(way))
                if len(clf)!=1 or len(vec)!=1:
                    print("└── No model found in '%s'. Skipped."%(i))
                    continue
                t0=time()
                sys.stdout.flush()
                print("├── Loading classifier '%s'..."%(i))
                sys.stdout.flush()
                if "clf_%s"%(i) not in models:
                    models["clf_%s"%(i)] = joblib.load(clf[0])
                    print("├── Done. [%.02fs]"%(time()-t0))
                    sys.stdout.flush()
                t0=time()
                print("├── Loading vectorizer '%s'..."%(i))
                sys.stdout.flush()
                if "vectorizer_%s"%(i) not in models:
                    models["vectorizer_%s"%(i)] = joblib.load(vec[0])
                    print("└── Done. [%.02fs]"%(time()-t0))
                    sys.stdout.flush()
                t0=time()
        except:
            print(">> Error on '%s', skipped."%(i))
    return models
Example #28
0
    def getClassifiers(self):
        if not os.path.exists(self.outDir):
            os.mkdir(self.outDir)
        outDir = self.outDir + os.sep + "classPickle"
        if not os.path.exists(outDir):
            os.mkdir(outDir)
        class1Save = outDir + os.sep + "classifier1.pkl"
        class2Save = outDir + os.sep + "classifier2.pkl"
        
        class1Exists = os.path.exists(class1Save)
        class2Exists = os.path.exists(class2Save)

        if not (class1Exists and class2Exists):
            self._setupTempDir()
            self.fitsFiles = [f[:-5] for f in os.listdir(self.fitsFolder) if ".fits" in f]
            self.fitsFilesLoc = [os.path.abspath(self.fitsFolder + os.sep + f) for f in os.listdir(self.fitsFolder) if ".fits" in f]
            
            for f in self.fitsFiles:
                self.mainCatalog[f] = self.getCatalog(self.fitsFolder + os.sep + f + ".fits", ishape=True)
                self.candidateMask[f] = self._getCandidateMask(self.mainCatalog[f], np.loadtxt(self.fitsFolder + os.sep + f + ".txt"))
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'WEIGHT', self.candidateMask[f] * 1.0, usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'EXTENDED', self.candidateMask[f], usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'HLR', np.zeros(self.mainCatalog[f].shape), usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'MAG', np.zeros(self.mainCatalog[f].shape), usemask=False)
            self._trainClassifier()
            joblib.dump(self.sc, class1Save) 
            joblib.dump(self.sc2, class2Save) 
        else:
            self.sc = joblib.load(class1Save)
            self.sc2 = joblib.load(class2Save)
            

        #self._testClassifier(catalog, candidateMask)
        #self._cleanTempDir()
        self._debug("Classifier generated. Now you can invoke .clasify(catalog)")
Example #29
0
    def _train(self, train_data, resources):
        sample_length = len(train_data)
        dict_status_path = os.path.join(root_dic,
                                        'dict_vectorizer_{}.status'.
                                        format(sample_length))
        if os.path.isfile(dict_status_path):
            dictVectorizer = joblib.load(dict_status_path)
        else:
            dictVectorizer = DictVectorizer()
            dictVectorizer.fit(train_data[self.features].
                               fillna(0).
                               to_dict('record'))
            joblib.dump(dictVectorizer, dict_status_path)

        tfidf_status_path = os.path.join(root_dic,
                                         'tfidf_vectorizer_{}.status'.
                                         format(sample_length))
        if os.path.isfile(tfidf_status_path):
            tfidf = joblib.load(tfidf_status_path)
        else:
            tfidf = TfidfVectorizer(min_df=40, max_features=300)
            tfidf.fit(train_data.essay)
            joblib.dump(tfidf, tfidf_status_path)

        resources['dictVectorizer'] = dictVectorizer
        resources['tfidf'] = tfidf
        print 'Head Processing Completed'
        return train_data, resources
Example #30
0
def load_model(model_path):
  gen_params_values = joblib.load(model_path+'_gen_params.jl')
  for p, v in izip(gen_params, gen_params_values):
      p.set_value(v)
  discrim_params_values = joblib.load(model_path+'_discrim_params.jl')
  for p, v in izip(discrim_params, discrim_params_values):
      p.set_value(v)
Example #31
0
def do_gbdt(train_x,
            train_y,
            test_x=None,
            test_y=None,
            learning_rate=0.03,
            max_depth=8,
            max_features=25,
            n_estimators=600,
            load=False,
            save=True,
            outfile=None,
            search=False):
    if search == False:
        mdl_name = 'gbdt_train_lr' + str(learning_rate) + '_n' + str(
            n_estimators) + '_maxdep' + str(max_depth) + '.pkl'
        if os.path.exists(mdl_name) == True:
            clf_gbdt = joblib.load(mdl_name)
        else:
            # create gradient boosting
            clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate,
                                                  max_depth=max_depth,
                                                  max_features=max_features,
                                                  n_estimators=n_estimators)
            #n_estimators=500, learning_rate=0.5, max_depth=3)
            clf_gbdt.fit(train_x, train_y)
            if save == True:
                try:
                    _ = joblib.dump(clf_gbdt, mdl_name, compress=1)
                except:
                    print("*** Save GBM model to pickle failed!!!")
                    if outfile != None:
                        outfile.write("*** Save RF model to pickle failed!!!")
        if test_x != None and test_y != None:
            probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
            score_gbdt = roc_auc_score(test_y, probas_gbdt)
            print("GBDT ROC score", score_gbdt)
        return clf_gbdt
    else:
        max_depth_list = [5, 6, 7]
        n_list = [2000, 3000]
        lr_list = [0.01, 0.005]
        info = {}
        for md in max_depth_list:
            for n in n_list:
                for lr in lr_list:
                    print 'max_depth = ', md
                    print 'n = ', n
                    print 'learning rate = ', lr
                    clf_gbdt = GradientBoostingClassifier(
                        learning_rate=learning_rate,
                        max_depth=md,
                        max_features=max_features,
                        n_estimators=n_estimators)
                    # n_estimators=500, learning_rate=0.5, max_depth=3)
                    clf_gbdt.fit(train_x, train_y)
                    probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
                    score_gbdt = roc_auc_score(test_y, probas_gbdt)
                    info[md, n, lr] = score_gbdt
        for md in info:
            scores = info[md]
            print(
                'GBDT max_depth = %d, n = %d, lr = %.5f, ROC score = %.5f(%.5f)'
                % (md[0], md[1], md[2], scores.mean(), scores.std()))
Example #32
0
    source_wav_file = sys.argv[1]
    gmm_file = sys.argv[2]
    converted_wav_file = sys.argv[3]

    # 変換元のwavファイルからメルケプストラムとピッチを抽出
    # numpyで読みやすいようにアスキー形式で保存
    print "extract mcep ..."
    source_mcep_file = "source.mcep_ascii"
    extract_mcep(source_wav_file, source_mcep_file, ascii=True)

    print "extract pitch ..."
    source_pitch_file = "source.pitch"
    extract_pitch(source_wav_file, source_pitch_file)

    # GMMをロード
    gmm = joblib.load(gmm_file)

    # 変換元のメルケプストラムをGMMで変換
    # SPTKで合成できるようにバイナリ形式で保存
    print "convert mcep ..."
    converted_mcep_file = "converted.mcep"
    convert_mcep(source_mcep_file, converted_mcep_file, gmm)

    # 変換元のピッチと変換したメルケプストラムから再合成
    print "synthesis ..."
    synthesis(source_pitch_file, converted_mcep_file, converted_wav_file)

    # 一時ファイルを削除
    os.remove(source_mcep_file)
    os.remove(source_pitch_file)
    os.remove(converted_mcep_file)
Example #33
0
def load_model(model):
    joblib.load(model)
Example #34
0
				raise
			# hancle Index exceptions
			except IndexError:
				counter = counter + 1
				# print("counts : ",counter)
				# print("error handled")

		# save all the train and test vectors
		joblib.dump(train_data, 'train_data.pkl') 
		joblib.dump(test_data, 'test_data.pkl') 
		joblib.dump(train_labels, 'train_labels.pkl') 
		joblib.dump(test_labels, 'test_labels.pkl') 
	else:
		# run only if create_vectors_again is unset
		# load all the train and test vectors
		train_data = joblib.load('train_data.pkl')
		test_data = joblib.load('test_data.pkl')
		train_labels = joblib.load('train_labels.pkl')
		test_labels = joblib.load('test_labels.pkl')

	# fit the model again if start_training_model is set
	if R_SEG.start_training_model == 1:
		clf = R_SEG.trainClassifier( R_SEG.select_classifier, train_data, train_labels )
	else:
		# load the fitted model if start_training_model is unset
		clf = joblib.load( R_SEG.select_classifier + '_model.pkl' )
	# find all the predictions on all test images
	predictions = R_SEG.predictImages(clf, test_data, test_labels)
	# save all the images
	R_SEG.saveSegmentedImage( num_test, image_filenames_test, predictions )
Example #35
0
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
from random import randint
from matplotlib.lines import Line2D
from sklearn.cluster import KMeans
from scipy.spatial import distance
from sklearn.externals import joblib


clf = joblib.load('Mood_disorder_45')
clf.predict()
'''
data = pd.read_csv('GTEx_v7_brain_subGEM-log-no.txt',sep='\t')
data = data.transpose()
print(data.shape)

data = data.fillna(0)

global_max = data.max().max()
global_min = data.min().min()

print(global_max)
'''
centroid = clf.cluster_centers_

num_clusters = 45

fig = plt.figure()
for i in range(0,num_clusters):

if __name__ == "__main__":

    import argparse
    import sys

    # Initialize parser
    parser = argparse.ArgumentParser()

    # Adding optional argument
    parser.add_argument("-file", "--File", help="Input Test File Path")
    parser.add_argument("-model", "--Model", help="Input Model Path")

    # Read arguments from command line
    args = parser.parse_args()

    if args.File:
        final_test, X = create_test_data(file_path=str(args.File))

    if args.Model:
        model = joblib.load(str(args.Model))
        y_pred_test = model.predict(X)
        classes = {0: 'SAD', 1: 'HAPPY'}
        final_test['MOOD_TAG'] = y_pred_test
        final_test['MOOD_TAG'] = [
            classes[item] for item in final_test['MOOD_TAG']
        ]
        file_name = "evaluation_classified.csv"
        final_test.to_csv(file_name, index=None, header=True)
files_tr = sorted(glob('../data/train_f*.f'))

# USE_PREF
li = []
for i in files_tr:
    for j in USE_PREF:
        if j in i:
            li.append(i)
            break
files_tr = li

[print(i, f) for i, f in enumerate(files_tr)]

X_train = pd.concat(
    [pd.read_feather(f) for f in tqdm(files_tr, mininterval=30)] +
    [joblib.load('../external/X_train_nejumi.pkl.gz')],
    axis=1)

y_train = utils.load_target()['HasDetections']

# drop
if len(col_drop) > 0:
    X_train.drop(col_drop, axis=1, inplace=True)

if X_train.columns.duplicated().sum() > 0:
    raise Exception(
        f'duplicated!: { X_train.columns[X_train.columns.duplicated()] }')
print('no dup :) ')
print(f'X_train.shape {X_train.shape}')

gc.collect()
Example #38
0
def load_model(serialized_model):
    model = joblib.load(serialized_model)
    logging.info("Model loaded from %s", serialized_model)
    return model
Example #39
0
# get data from database
def get_data():
	sql_con=MySQLdb.connect(
	    host='127.0.0.1',
	    port= 3306,
	    user='******',
	    passwd='XXXXXX',
	    db='Hackthon2019',
	    use_unicode=True,
	    charset="utf8"
	)
	sql_cur=sql_con.cursor()
	sql_cur.execute("SELECT using FROM training_data")
	using=sql_con.commit()
	sql_cur.execute("SELECT pnexts FROM training_data")
	pre_nexts=sql_con.commit()

	return (using, pre_nexts)

# train model
training()

model = joblib.load(model_name)

now_using = "photoshop"
result = model.predict(now_using)

# send message to windows computer
notify = Notify()
notify.register()
notify.send("Also open "+str(result)+"?") # should be picture
Example #40
0
def do_RF(train_x,
          train_y,
          test_x=None,
          test_y=None,
          n_estimators=2000,
          max_depth=20,
          max_features=20,
          criterion='entropy',
          method='isotonic',
          cv=5,
          min_samples_leaf=1,
          min_samples_split=13,
          random_state=4141,
          n_jobs=-1,
          load=False,
          save=True,
          outfile=None,
          search=False):
    if search == False:
        #mdl_name = 'rf_train_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '_maxfeat' + str(max_features) \
        mdl_name = 'rf_isotonic_train_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '_maxfeat' + str(max_features) \
                   + '_minSamLeaf' + str(min_samples_leaf) + '_minSamSplit' + str(min_samples_split) + '.pkl'
        if os.path.exists(mdl_name) == True:
            clf_rf_isotonic = joblib.load(mdl_name)
        else:
            clf_rf = RandomForestClassifier(
                n_estimators=n_estimators,
                max_depth=max_depth,
                max_features=max_features,
                criterion=criterion,
                min_samples_leaf=min_samples_leaf,
                min_samples_split=min_samples_split,
                random_state=random_state,
                n_jobs=n_jobs)
            clf_rf_isotonic = CalibratedClassifierCV(clf_rf,
                                                     cv=cv,
                                                     method=method)
            clf_rf_isotonic.fit(train_x, train_y)
            if save == True:
                try:
                    _ = joblib.dump(clf_rf_isotonic, mdl_name, compress=1)
                except:
                    print("*** Save RF model to pickle failed!!!")
                    if outfile != None:
                        outfile.write("*** Save RF model to pickle failed!!!")
        if test_x != None and test_y != None:
            probas_rf = clf_rf_isotonic.predict_proba(test_x)[:, 1]
            score_rf = roc_auc_score(test_y, probas_rf)
            print("RF ROC score", score_rf)
        return clf_rf_isotonic
    else:
        if test_x == None or test_y == None:
            print "Have to provide test_x and test_y to do grid search!"
            return -1

        min_samples_split = [10, 11, 12]
        max_depth_list = [15, 20, 25]
        n_list = [2000]
        max_feat_list = [10, 20, 30]
        info = {}
        for mss in min_samples_split:
            for max_depth in max_depth_list:
                #for n in n_list:
                for max_features in max_feat_list:
                    print 'max_features = ', max_features
                    n = 2000
                    print 'n = ', n
                    print 'min_samples_split = ', mss
                    print 'max_depth = ', max_depth
                    clf_rf = RandomForestClassifier(
                        n_estimators=n,
                        max_depth=max_depth,
                        max_features=max_features,
                        criterion=criterion,
                        min_samples_leaf=min_samples_leaf,
                        min_samples_split=mss,
                        random_state=random_state,
                        n_jobs=n_jobs)
                    #clf_rf.fit(train_x, train_y)
                    clf_rf_isotonic = CalibratedClassifierCV(clf_rf,
                                                             cv=cv,
                                                             method=method)
                    clf_rf_isotonic.fit(train_x, train_y)
                    probas_rf = clf_rf_isotonic.predict_proba(test_x)[:, 1]
                    scores = roc_auc_score(test_y, probas_rf)
                    info[max_features, mss, max_depth] = scores
        for mss in info:
            scores = info[mss]
            print(
                'clf_rf_isotonic: max_features = %d, min_samples_split = %d, max_depth = %d, ROC score = %.5f(%.5f)'
                % (mss[0], mss[1], mss[2], scores.mean(), scores.std()))
Example #41
0
        "B":{
        "0":396.9
        },
        "LSTAT":{
        "0":4.98
        }
        
        result looks like:
        { "prediction": [ <val> ] }
        
        """

    # Logging the input payload
    json_payload = request.json
    LOG.info(f"JSON payload: \n{json_payload}")
    inference_payload = pd.DataFrame(json_payload)
    LOG.info(f"Inference payload DataFrame: \n{inference_payload}")
    # scale the input
    scaled_payload = scale(inference_payload)
    # get an output prediction from the pretrained model, clf
    prediction = list(clf.predict(scaled_payload))
    # TO DO:  Log the output prediction value
    LOG.info(f"prediction: {prediction}")
    return jsonify({'prediction': prediction})


if __name__ == "__main__":
    # load pretrained model as clf
    clf = joblib.load("./model_data/boston_housing_prediction.joblib")
    app.run(host='0.0.0.0', port=80, debug=True)  # specify port=80
Example #42
0
def load_model(filename):
    return joblib.load(filename)
    include_top=False)

#
in_tensor = base_model.inputs[0]
out_tensor = base_model.outputs[0]

out_tensor = tf.keras.layers.GlobalAveragePooling2D()(out_tensor)

# Define the full model by the endpoints
model = tf.keras.models.Model(inputs=[in_tensor], outputs=[out_tensor])

# Compile the model for execution. Losses and optimizers can be
# anything here, since we don't train the model
model.compile(loss="categorical_crossentropy", optimizer='sgd')

LDA = joblib.load('trained_LDA')

with open("submission_LDA.csv", "w") as fp:
    fp.write("Id,Category\n")

    # Image index
    i = 0
    # 1. load image and resize
    for file in os.listdir("test\\testset"):
        if file.endswith(".jpg"):
            # Load the image
            img = plt.imread("test\\testset\\" + file)

            # Resize it to the net input size:
            img = cv2.resize(img, (224, 224))
Example #44
0
import keyboard
import collections
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import svm
from sklearn.externals import joblib
import time
from scipy.ndimage.filters import gaussian_filter

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

#svm_model_linear = joblib.load('filtered.pkl')
clf = joblib.load('Quad.pkl')
print("press cntrl to see real time data!")


class MyListener(myo.DeviceListener):
    def __init__(self, queue_size=8):
        self.lock = threading.Lock()
        self.emg_data_queue = collections.deque(maxlen=queue_size)
        self.gyro_data_queue = collections.deque(maxlen=3)
        self.ori_data_queue = collections.deque(maxlen=4)
        self.acc_data_queue = collections.deque(maxlen=3)

    def on_connect(self, device, timestamp, firmware_version):
        device.set_stream_emg(myo.StreamEmg.enabled)

    def on_emg_data(self, device, timestamp, emg_data):
Example #45
0
#import numpy as np
#import matplotlib.pyplot as plt
#import pandas as pd
import re

from sklearn.externals import joblib
import pickle
#saved_classifier = joblib.load('saved_classifier.sav')

saved_classifier = joblib.load("class.pkl")
my_cv = joblib.load("my_cv.pkl")
'''import re
import nltk
nltk.download('stopwords')'''
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

#dataset = pd.read_csv('train.csv')
#train = dataset.iloc[0:20001, 1:]
#y = dataset.iloc[0:len(train), 2:].values

sentences = []
'''for i in range(0,len(train)):
    sent = re.sub(r"i'm", "i am", train['comment_text'][i])
    sent = re.sub(r"he's", "he is", train['comment_text'][i])
    sent = re.sub(r"she's", "she is", train['comment_text'][i])
    sent = re.sub(r"that's", "that is", train['comment_text'][i])
    sent = re.sub(r"what's", "what is", train['comment_text'][i])
    sent = re.sub(r"where's", "where is", train['comment_text'][i])
    sent = re.sub(r"how's", "how is", train['comment_text'][i])
parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()
pprint(vars(arguments))

# read parameters from speaker yml
sconf1 = SpeakerYML(arguments.org_yml)
sconf2 = SpeakerYML(arguments.tar_yml)
pconf = PairYML(arguments.pair_yml)

# read GMM for mcep
mcepgmm = GMMConvertor(
    n_mix=pconf.GMM_mcep_n_mix,
    covtype=pconf.GMM_mcep_covtype,
    gmmmode=None,
)
param = joblib.load(arguments.gmm)
mcepgmm.open_from_param(param)

# constract FeatureExtractor class
feat1 = FeatureExtractor(
    analyzer=sconf1.analyzer,
    fs=sconf1.wav_fs,
    fftl=sconf1.wav_fftl,
    shiftms=sconf1.wav_shiftms,
    minf0=sconf1.f0_minf0,
    maxf0=sconf1.f0_maxf0,
)
feat2 = FeatureExtractor(
    analyzer=sconf2.analyzer,
    fs=sconf2.wav_fs,
    fftl=sconf2.wav_fftl,
lrelu = activations.LeakyRectify(leak=0.2)
sigmoid = activations.Sigmoid()

trX, vaX, teX, trY, vaY, teY = pastaBlackWhite()

vaX = floatX(vaX) / 127.5 - 1.
trX = floatX(trX) / 127.5 - 1.
teX = floatX(teX) / 127.5 - 1.

X = T.tensor4()

desc = 'cond_dcgan'
epoch = 5999
params = [
    sharedX(p)
    for p in joblib.load('models/%s/%d_discrim_params.jl' % (desc, epoch))
]
print desc.upper()
print 'epoch %d' % epoch


def mean_and_var(X):
    u = T.mean(X, axis=[0, 2, 3])
    s = T.mean(T.sqr(X - u.dimshuffle('x', 0, 'x', 'x')), axis=[0, 2, 3])
    return u, s


def bnorm_statistics(X, w, w2, g2, b2, w3, g3, b3, wy):
    h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))

    h2 = dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))
Example #48
0
def loadModel(modelPath):

    log("Start load model: ", modelPath)
    clf = joblib.load(modelPath)
    return clf
 def reload(self, filename):
     self.logger.info("reload")
     self.clf = joblib.load(filename)
Example #50
0
def predicao():
    ss = StandardScaler()

    ss = StandardScaler()
    # desabilita mensagens de aviso
    pd.options.mode.chained_assignment = None  # default='warn'

    # obtem dados para criar modelo
    df = pd.read_csv('registro_candidatos.csv')

    # obter recursos e resultados correspondentes
    feature_names = [
        'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao',
        'ErrosPraticos'
    ]

    training_features = df[feature_names]

    outcome_name = ['Recomenda']
    outcome_labels = df[outcome_name]

    #  listar recursos com base no tipo
    numeric_feature_names = ['MediaAvaliacao', 'ErrosPraticos']
    categoricial_feature_names = ['Nota', 'DinamicadeGrupo', 'Agressividade']

    # Ajusta como 'scaler' os recursos numéricos
    ss.fit(training_features[numeric_feature_names])

    # scale numeric features now
    training_features[numeric_feature_names] = ss.transform(
        training_features[numeric_feature_names])

    training_features = pd.get_dummies(training_features,
                                       columns=categoricial_feature_names)

    # print(training_features)

    categorical_engineered_features = list(
        set(training_features.columns) - set(numeric_feature_names))

    feature_names = [
        'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao',
        'ErrosPraticos'
    ]
    numeric_feature_names = ['MediaAvaliacao', 'ErrosPraticos']
    categoricial_feature_names = ['Nota', 'DinamicadeGrupo', 'Agressividade']

    model = joblib.load(r'Model/model.pickle')
    scaler = joblib.load(r'Scaler/scaler.pickle')

    ## novos dados para classificação
    root.geometry("500x500")

    w = Label(root, text="Nome do Candidato")
    w.pack()
    nome2 = Entry(root)
    nome2.pack()
    nome2.delete(0, END)
    nome2.insert(0, "")

    w = Label(root, text="")
    w.pack()

    w = Label(
        root,
        text="Digite a Nota:\n(Excelente - Alta - Boa - Média - Ruim - Péssima)"
    )
    w.pack()
    nota2 = Entry(root)
    nota2.pack()
    nota2.delete(0, END)
    nota2.insert(0, "")

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Tem dinâmica de Grupo? (Sim - Não)")
    w.pack()
    dinamica = Entry(root)
    dinamica.pack()
    dinamica.delete(0, END)
    dinamica.insert(0, "")

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Tem agressividade? (Sim - Não)")
    w.pack()
    agressivida = Entry(root)
    agressivida.pack()
    agressivida.delete(0, END)

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Média na avaliação:")
    w.pack()
    media = Entry(root)
    media.pack()
    media.delete(0, END)

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Erros em teste prático:")
    w.pack()
    erros = Entry(root)
    erros.pack()
    erros.delete(0, END)

    w = Label(root, text="")
    w.pack()

    def consul():
        nome = nome2.get()
        nota = nota2.get()
        dinamicadegrupo = dinamica.get()
        agressividade = agressivida.get()
        mediaavaliacao = int(media.get())
        errospraticos = int(erros.get())

        new_data = pd.DataFrame([{
            'Nome': nome,
            'Nota': nota,
            'DinamicadeGrupo': dinamicadegrupo,
            'Agressividade': agressividade,
            'MediaAvaliacao': int(mediaavaliacao),
            'ErrosPraticos': int(errospraticos)
        }])
        new_data = new_data[[
            'Nome', 'Nota', 'DinamicadeGrupo', 'Agressividade',
            'MediaAvaliacao', 'ErrosPraticos'
        ]]

        ## preparando predição com novos dados
        prediction_features = new_data[feature_names]

        # escalando
        prediction_features[numeric_feature_names] = scaler.transform(
            prediction_features[numeric_feature_names])

        # Variáveis categóricas
        prediction_features = pd.get_dummies(
            prediction_features, columns=categoricial_feature_names)

        # adicionar coluna de recurso categórico ausente

        current_categorical_engineered_features = set(
            prediction_features.columns) - set(numeric_feature_names)
        missing_features = set(categorical_engineered_features
                               ) - current_categorical_engineered_features
        for feature in missing_features:
            # add zeros, desde que o recurso esteja ausente nessas amostras de dados
            prediction_features[feature] = [0] * len(prediction_features)

        # predição usando o modelo previamente treinado
        predictions = model.predict(prediction_features)

        # Resultados
        new_data['Recomenda'] = predictions

        print(new_data)
        tkMessageBox.showinfo(
            "Consulta Concluída!",
            "O sistema recomenda este candidato:\n *** " +
            str(new_data['Recomenda'][0] + str("***\n")))

    busca = Button(root, text="Consultar", command=lambda: consul())
    busca.pack()
    root.mainloop()
#!C:\Users\sujan\AppData\Local\Programs\Python\Python37\python
print("Content-type: text/html\r\n\n")
# -*- coding: utf-8 -*-

#importing libraries
from sklearn.externals import joblib
import inputScript
import sys

#load the pickle file
classifier = joblib.load('rf_final.pkl')

#input url

url = sys.argv[1]
#checking and predicting
checkprediction = inputScript.main(url)
prediction = classifier.predict(checkprediction)
if prediction == 1:
    print(" THIS IS PHISHING URL")
else:
    print(" THIS IS NOT PHISHING URL")
Example #52
0
for plot_num, folder in enumerate(to_consider):
    df = pd.read_csv("../../data/hvac/minutes_%s.csv" % folder)
    df["hvac_class_copy"] = df["hvac_class"].copy()
    df = df[df.dataid.isin(find_common_dataids())]
    df.index = range(len(df))

    if NUM_CLASSES == 2:
        df.hvac_class[(df.hvac_class == "Average") |
                      (df.hvac_class == "Good")] = "Not bad"
        COLUMN_NAMES = ["Bad", "Not bad"]
    else:
        COLUMN_NAMES = ["Average", "Bad", "Good"]

    np.random.seed(0)
    clf = joblib.load(
        os.path.expanduser("~/git/nilm-actionable/data/hvac/rf_hvac.pkl"))
    true_labels = df['hvac_class'].values
    pred_labels = clf.predict(df[list(f)])

    numeric_cols = f
    df[numeric_cols] = df[numeric_cols].div(df[numeric_cols].max())
    accur = accuracy_multiclass(true_labels, pred_labels)

    print folder
    print accur
    print pd.value_counts(pred_labels)
    confusion_df = pd.DataFrame(confusion_matrix(true_labels, pred_labels),
                                index=["Feedback", "No Feedback"],
                                columns=["Feedback", "No Feedback"])
    sns.heatmap(confusion_df, annot=True, fmt="d", linewidths=.5, ax=ax)
    #ax.set_title(return_name(folder)[0])
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterRis', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # create visuals
    # TODO: Below is an example - modify to create your own visuals
    catg_nam = df.iloc[:, 4:].columns
Example #54
0
def load_model(filename="./model_data.pkl"):
    model_data = joblib.load(filename)
    return model_data["classifier"], model_data["scaler"]
Example #55
0
def load_model_and_scaler(from_dir):
    with open(f"{from_dir}/model.json", "r") as f:
        simpmodel = model_from_json(f.read())
    simpmodel.load_weights(f"{from_dir}/model.h5")
    scaler = joblib.load(f"{from_dir}/scaler.save")
    return simpmodel, scaler
Example #56
0
def get_samples(foldername, filter=None):
    samples = []
    for file in os.listdir(foldername):
        if filter and file.find(filter) == -1:
            continue
        for sample in sample_file(foldername + '/' + file).get_samples():
            samples.append(sample)

    return samples


if __name__ == '__main__':
    arguments = docopt.docopt(__doc__)
    filters = {'dancing': 0, 'walking': 1, 'sitting': 2}
    if arguments['--model']:
        clf = joblib.load(arguments['--model'])
    else:
        training = dataset('../datasets/training', filters)

        svr = svm.SVC()
        exponential_range = [pow(10, i) for i in range(-4, 1)]
        parameters = {
            'kernel': ['linear', 'rbf'],
            'C': exponential_range,
            'gamma': exponential_range
        }
        clf = grid_search.GridSearchCV(svr, parameters, n_jobs=8, verbose=True)
        clf.fit(training.data, training.target)
        joblib.dump(clf, '../models/1s_6sps.pkl')
        print clf
Example #57
0
    return preds

if __name__ == '__main__':
    '''
        call like 
        python model_predict.py IBM 5DayWindowBestLongBuyPrice
        python model_predict.py GSPC 5DayWindowBestLongBuyPrice IBM
    '''
    
    ticker = sys.argv[1]
    target = sys.argv[2]
    target_ticker = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] != '>' else ticker # Target Feature Set
    
    test_features_file = config.GetTestingFeaturesFileName(ticker)
    predictions_file = config.GetPredictionsFileName(ticker, target, target_ticker)
    model_file = config.GetModelFileName(ticker, target, target_ticker)
       
    # reconstitute trained model
    print "Loading model..."
    model = joblib.load(model_file)
    
    print "Loading data..."
    test_features = numpy.load(test_features_file)
    
    print "Making predictions..."
    predictions = predict(model, test_features)
    
    print "Saving predictions to file..."
    numpy.save(predictions_file, predictions)
            
Example #58
0

DB.create_tables([Prediction], safe=True)


# End database stuff
########################################

########################################
# Unpickle the previously-trained model


with open('columns.json') as fh:
    columns = json.load(fh)

pipeline = joblib.load('pipeline.pickle')

with open('dtypes.pickle', 'rb') as fh:
    dtypes = pickle.load(fh)


# End model un-pickling
########################################


########################################
# Begin webserver stuff

app = Flask(__name__)

			yield (x, y, img[y:y + windowsize[1], x:x + windowsize[0]])

def hogs(img):
    gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
    gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...16)
    bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
    mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
    hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
    hist = np.hstack(hists)
    return hist     # hist is a 64 bit vector


path =os.getcwd()
Classifier = joblib.load('linear_2.pkl')
fi = path +"/svm/json/label_test.json"
with open(fi,'r') as files:
    label_test = np.array(json.load(files))
fi = path +"/svm/json/feature_test.json"
with open(fi,'r') as files:
    feature_test = np.array(json.load(files))

print "predicting.."
predict = Classifier.predict(feature_test)
print "Expected output:",label_test
print "Predicted output:",predict
print "Confusion Matrix:\n",metrics.confusion_matrix(label_test,predict)
print "Fowlkes Mallows Score",fowlkes_mallows_score(label_test,predict)

        spct += 1

    print "Training Completed"

    confusion_matrix = np.zeros((total_sp, total_sp))
    tct = 0
    for speaker in speakers:
        if tct <= 0:
            tct = len(glob.glob('test_wavdata/' + speaker + '/*.wav'))
        for testcasefile in glob.glob('test_wavdata/' + speaker + '/*.wav'):
            [Fs, x] = audioBasicIO.readAudioFile(testcasefile)
            features = extract_MFCCs(x, Fs, window * Fs, window_overlap * Fs,
                                     voiced_threshold_mul,
                                     voiced_threshold_range, calc_deltas)
            max_score = -9999999
            max_speaker = speaker
            for modelfile in sorted(glob.glob('train_models/*.pkl')):
                gmm = joblib.load(modelfile)
                score = gmm.score(features)
                if score > max_score:
                    max_score, max_speaker = score, modelfile.replace(
                        'train_models/', '').replace('.pkl', '')
            print speaker + " -> " + max_speaker + (" Y" if speaker
                                                    == max_speaker else " N")
            confusion_matrix[speakers[speaker]][speakers[max_speaker]] += 1

    print "Accuracy: ", (sum([
        confusion_matrix[i][j] if i == j else 0 for i in xrange(total_sp)
        for j in xrange(total_sp)
    ]) * 100) / float(tct * total_sp)