Python load Examples, sklearn.externals.joblib.load Python Examples

Example #1

0

Show file

File: digit recognizer next.py Project: AlekseiGrigoryev/GC_Data_Mining_2013

def recognize (train_file,test_file):
    print ("1.Parsing sets")
    datalist = loadtxt(open(train_file, 'r'), dtype='f8', delimiter=',', skiprows=1)
    joblib.dump(datalist, 'training_set.pkl')
    datalist = joblib.load('training_set.pkl')
    label = [x[0] for x in datalist]
    train = [x[1:] for x in datalist]
    test = loadtxt(open(test_file, 'r'), dtype='f8', delimiter=',', skiprows=1)
    joblib.dump(test, 'test_set.pkl')
    test = joblib.load('test_set.pkl')

    print ("2.Create and train RF")
    temp = RandomForestClassifier(n_estimators=100, n_jobs=4)
    cv = cross_validation.KFold(len(train), n_folds=5, indices=True)

    scores = []
    
    for train_indices, test_indices in cv:
        print ('Train: %s | test: %s' % (len(train_indices),len(test_indices)))
        trainfit = train[train_indices[0]:train_indices[-1]+1]
        traintest = train [test_indices[0]:test_indices[-1]+1]
        labelfit = label[train_indices[0]:train_indices[-1]+1]
        labeltest = label[test_indices[0]:test_indices[-1]+1]
        scores.append(temp.fit(trainfit, labelfit).score(traintest, labeltest))

    print ("Accuracy: " + str(np.array(scores).mean()))

Example #2

0

Show file

File: views.py Project: jtsitron/FriendOrMate

def cities_output():
    user = request.args.get('ID')
    user = re.match('@?(.*)', user).groups()[0]
    cur = db.cursor()
    cur.execute("SELECT TWEET FROM tweets_by_user WHERE HANDLE='%s';" % user)
    query_results = np.array(cur.fetchall())
    if len(query_results) == 0:
        try:    
            stuff = api.user_timeline(screen_name = user, count = 8000, include_rts = False)
        
            for status in stuff:
                tweet =status.text
                screen_name = user
                cur.execute("INSERT INTO tweets_by_user (HANDLE, TWEET) VALUES (%s,%s)",(screen_name, tweet))
            db.commit()
            cur.execute("SELECT TWEET FROM tweets_by_user WHERE HANDLE='%s';" % user)
            query_results = np.array(cur.fetchall())
        except:
            return render_template('not_exist.html', user = user)
    tweets = [t[0] for t in query_results]
    vectorizer = joblib.load('/home/jtsitr/twitter_project/vectorizer.pkl')
    clf = joblib.load('/home/jtsitr/twitter_project/clf.pkl')
    tweets = vectorizer.transform(tweets)
    prediction = clf.predict(tweets)
    if tweets.shape[0]<100:
        return render_template('not_enough_tweets.html', user=user)
    else:
        the_result = np.mean(prediction)
        try:
            return render_template("output_final.html", the_result = the_result, user = user)
        except Exception as e:
            return render_template('500.html', error = str(e))

Example #3

0

Show file

File: vgg.py Project: Faruk-Ahmed/sklearn-theano

def fetch_vgg_architecture(caffemodel_parsed=None, caffemodel_protobuffer=None):
    """Fetch a pickled version of the caffe model, represented as list of
    dictionaries."""

    default_filename = os.path.join(VGG_PATH, 'vgg.pickle')
    if caffemodel_parsed is not None:
        if os.path.exists(caffemodel_parsed):
            return joblib.load(caffemodel_parsed)
        else:
            if os.path.exists(default_filename):
                import warnings
                warnings.warn('Did not find %s, but found %s. Loading it.' %
                              (caffemodel_parsed, default_filename))
                return joblib.load(default_filename)
    else:
        if os.path.exists(default_filename):
            return joblib.load(default_filename)

    # We didn't find the file: let's create it by parsing the protobuffer
    protobuf_file = fetch_vgg_protobuffer_file(caffemodel_protobuffer)
    model = _parse_caffe_model(protobuf_file)

    if caffemodel_parsed is not None:
        joblib.dump(model, caffemodel_parsed)
    else:
        joblib.dump(model, default_filename)

    return model

Example #4

0

Show file

File: captcha_jiangxi.py Project: xcctbys/Captchacrack

    def predict(self, img_path):
        img, positions, pix_data, captcha_type = self.read_img(img_path)
        print positions, captcha_type
        if positions is None:
            print('图像切分错误！')
            return None
        x = np.array(self.get_pix_list(pix_data, positions, captcha_type))
        if captcha_type == 'number':
            if self.model is None or os.path.isfile(self.number_model_file):
                self.model = joblib.load(self.number_model_file)
            else:
                raise IOError
        elif self.model is None or os.path.isfile(self.symbol_model_file):
                self.model = joblib.load(self.symbol_model_file)
        else:
            raise IOError
        predict_label = list()
        for i in range(x.shape[0]):
            input = x[i, :]
            predict_y = self.model.predict(input)[0]
            if int(predict_y) >= len(self.number_label_list) or int(predict_y) < 0:
                return "", ""
            if captcha_type == 'number':
                predict_label.append(self.number_label_list[predict_y])
            else:
                predict_label.append(self.symbol_label_list[predict_y])

        return u"".join(predict_label), self.__caculate(predict_label, captcha_type)

Example #5

0

Show file

File: sk_classifier.py Project: aclevine/ISO-space

 def load_model(self, path):
     self.clf = joblib.load(os.path.join(path, 'model.pkl'))
     with open(os.path.join(path, 'labels.json'), 'r') as fo:
         self.labels = Alphabet.from_dict(json.load(fo))
     with open(os.path.join(path, 'model_info.json'), 'r') as fo:
         self.model_info = json.load(fo)            
     self.features = joblib.load(os.path.join(path, 'featvec.pkl'))

Example #6

0

Show file

File: jb_function.py Project: smajida/Face_Verification-1

 def __init__(self):
     self.pca = joblib.load("result/pca_model.m")
     self.scaler = joblib.load("result/scale_model.m")
     with open("result/A_con.pkl", "rb") as f:
         self.A = pickle.load(f)
     with open("result/G_con.pkl", "rb") as f:
         self.G = pickle.load(f)

Example #7

0

Show file

File: utils.py Project: Zerowxm/kdd-cup2009

def selectFeatures(X,t=0):
    if(t==0):
        selector=joblib.load('selector.pkl')
    else:
        selector=joblib.load('SelectKBest.pkl')
    X_new=selector.transform(X)
    return X_new

Example #8

0

Show file

File: log_linear.py Project: organization-lab/weibo-predict

def predict(filein_name):
    """预测

    """
    filein_name = '0908-12.txt'
    # get models
    from sklearn.externals import joblib
    LR010 = joblib.load('0903_uid_ave_010.pkl') 
    LR001 = joblib.load('0903_uid_ave_001.pkl') 
    LR100 = joblib.load('0903_uid_ave_100.pkl') 

    import scipy.io as sio

    X = sio.loadmat('uid_dict_X001-12.mat')['X']
    y_predict_prob = LR001.predict_proba(X)
    print(y_predict_prob.shape)
    sio.savemat(filein_name[:-4] + 'y001.mat', {'y':y_predict_prob})

    X = sio.loadmat('uid_dict_X010-12.mat')['X']
    y_predict_prob = LR010.predict_proba(X)
    print(y_predict_prob.shape)
    sio.savemat(filein_name[:-4] + 'y010.mat', {'y':y_predict_prob})

    X = sio.loadmat('uid_dict_X100-12.mat')['X']
    y_predict_prob = LR100.predict_proba(X)
    print(y_predict_prob.shape)
    sio.savemat(filein_name[:-4] + 'y100.mat', {'y':y_predict_prob})

Example #9

0

Show file

File: main.py Project: ppapaya/cail-2018

def train_pipeline(kind, cut, vectorizer, model_trainer, do_cut=False, do_vectorizer=False, record_num=None):
    print('reading...')
    alltext, accu_label, law_label, time_label = data.read_trainData("./data/data_train.json", record_num)

    if do_cut:
        print('cutting...')
        train_text = cut.cut(alltext)
        joblib.dump(train_text, './data/{}_cut_train.txt'.format(cut.name))

        print('cleaning...')
        cleaner = Cleaner()
        cleaned_train_text = cleaner.clean(train_text)
        joblib.dump(cleaned_train_text, './data/{}_cut_train_cleaned.txt'.format(cut.name))
    else:
        print('load existing cut file {}...'.format('./data/{}_cut_train_cleaned.txt'.format(cut.name)))
        cleaned_train_text = joblib.load('./data/{}_cut_train_cleaned.txt'.format(cut.name))

    vectorizer_name = '{}_{}'.format(cut.name, vectorizer.name)
    if do_vectorizer:
        print('{} training...'.format(vectorizer_name))
        vectorizer = vectorizer.train(cleaned_train_text)
        joblib.dump(vectorizer,
                    './model/{}/predictor/model/{}_vectorizer.model'.format(model_trainer.name, vectorizer_name))
        print('{} vectorizing...'.format(vectorizer))
        vec = vectorizer.transform(cleaned_train_text)
        joblib.dump(vec, './data/vec_{}.txt'.format(vectorizer_name))
    else:
        print('load existing vec file {}...'.format('./data/vec_{}.txt'.format(vectorizer_name)))
        vec = joblib.load('./data/vec_{}.txt'.format(vectorizer_name))

    print('{} training...'.format(kind))
    model = model_trainer.train(vec, accu_label)
    joblib.dump(model, './model/{}/predictor/model/{}_{}.model'.format(model_trainer.name, vectorizer_name, kind))

Example #10

0

Show file

File: SVMtrain.py Project: BLKStone/EasyPyPR

def CV_trainModel():

	# 数据预处理
	label_has = joblib.load('data/label_has.pkl')
	traindata_has = joblib.load('data/data_has.pkl')

	label_no = joblib.load('data/label_no.pkl')
	traindata_no = joblib.load('data/data_no.pkl')

	traindata = np.vstack([traindata_has,traindata_no])
	labels = np.hstack([label_has,label_no])

	# print traindata.dtype
	# print labels.dtype

	traindata = np.float32(traindata)
	labels = np.int32(labels)

	model = SVM(C=1.0, gamma=1.0)

	model.train(traindata,labels)
	model.save('model/svm.dat')
	#model.load('model/svm.dat')

	return model

Example #11

0

Show file

File: train_classifier.py Project: ranveeraggarwal/traffic-light-detection

def train_classifier():
    pos_feat_path = positive_features_path
    neg_feat_path = negative_features_path

    model_path = classifier_model_path

    feature_vectors = []
    labels = []

    for feat_path in glob.glob(os.path.join(pos_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(1)

    for feat_path in glob.glob(os.path.join(neg_feat_path, "*.feat")):
        fd = joblib.load(feat_path)
        print len(fd)
        if len(fd):
            fd = fd.astype(numpy.object)
            feature_vectors.append(fd)
            labels.append(0)

    classifier = LinearSVC()
    print "Training classifier"
    classifier.fit(feature_vectors, labels)
    print "Classifier successfully trained"
    if not os.path.isdir(os.path.split(model_path)[0]):
        os.makedirs(os.path.split(model_path)[0])
    joblib.dump(classifier, model_path)

Example #12

0

Show file

File: nlp_error_predict_batch.py Project: jiluhu/flask_ml

def init_api(app, es_util):
    # 加载.pkl
    model = joblib.load(r'app\rules\modelspkl\knowledge_cart.pkl')
    tf_transformer = joblib.load(r'app\rules\modelspkl\knowledge_tf_transformer.pkl')
    le = joblib.load(r'app\rules\modelspkl\knowledge_labelencoder.pkl')

    @app.route('/ml_error_predict_batch', methods=['POST'])
    def error_predict():
        parmStr = request.get_data()
        paramDict = json.loads(parmStr)
        testline = paramDict['index']


        """
        获取用户信息
        :return: json
        """
        auth = Auth()
        result = auth.identify(request)

        if (result['status']):
            test_datas = readDataLine(testline)
            if test_datas != None:
                test_feature_datas = tf_transformer.transform(test_datas)

                # 进行预测
                pred = model.predict(test_feature_datas)
                result = le.inverse_transform(pred)
                print ('预测结果:%s,预测内容:%s' % (result[0], testline))
                #logging.debug('预测结果:%s,预测内容:%s' % (result[0], testline))
                return result[0]
            return "this is None"
        else:
            return jsonify(result)

Example #13

0

Show file

File: SVMtrain.py Project: BLKStone/EasyPyPR

def trainModel():

	# 数据预处理
	data_train = joblib.load('data/data_train.pkl')
	label_train = joblib.load('data/label_train.pkl')

	print data_train.shape

	clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.1, degree=0.1, gamma=1.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=True)

	#clf.set_params(kernel='rbf')

	print clf

	print data_train.shape
	print label_train.shape

	print 'begin training....'
	clf.fit(data_train,label_train)
	print 'finish training....'
	print clf
	joblib.dump(clf, 'model/svm.pkl')
	
	return None

Example #14

0

Show file

File: DMC2016.py Project: Waffleboy/DMC2016

 def varianceInProductGroups(df):
     nonlocal state
     print("Making: varianceInProductGroups")
     if state == 1 and os.path.exists('pickleFiles/colorStd.pkl') and os.path.exists('pickleFiles/sizeStd.pkl'):
         sizeStd = joblib.load('pickleFiles/sizeStd.pkl')
         colorStd = joblib.load('pickleFiles/colorStd.pkl')
     elif state == 0 and os.path.exists('pickleFiles/colorStd_test.pkl') and os.path.exists('pickleFiles/sizeStd_test.pkl'):
         sizeStd = joblib.load('pickleFiles/sizeStd_test.pkl')
         colorStd = joblib.load('pickleFiles/colorStd_test.pkl')
     else:
         products = df.groupby('productGroup')
         sizeStd, colorStd = {},{}
         for idx,product in products:
             if idx not in sizeStd or idx not in colorStd:
                 size = np.std(list(Counter(product['sizeCode']).values()))
                 color = np.std(list(Counter(product['colorCode']).values()))
                 sizeStd[idx] = size
                 colorStd[idx] = color
         if state == 1:
             joblib.dump(sizeStd,'pickleFiles/sizeStd.pkl')
             joblib.dump(colorStd,'pickleFiles/colorStd.pkl')
         else:
             joblib.dump(sizeStd,'pickleFiles/sizeStd_test.pkl')
             joblib.dump(colorStd,'pickleFiles/colorStd_test.pkl')
     df['sizeStd'] = df['productGroup'].map(sizeStd)
     df['colorStd'] = df['productGroup'].map(colorStd)
     return df

Example #15

0

Show file

File: misc.py Project: kedz/cuttsum

def event2semsim(event):
    import os
    from sklearn.externals import joblib
    if isinstance(event, str):
        etype = event
    else:
        etype = event.type
    if etype == "accident":
        return joblib.load(os.path.join(
            os.getenv("TREC_DATA"),
            "semsim", "accidents.norm-stem.lam20.000.pkl"))
    elif etype== "earthquake" or etype == "storm" or etype == "impact event":
        return joblib.load(
            os.path.join(
                os.getenv("TREC_DATA"), 
                "semsim", "natural-disasters.norm-stem.lam20.000.pkl"))
    elif etype == "protest" or etype == "riot":
        return joblib.load(
            os.path.join(
                os.getenv("TREC_DATA"), 
                "semsim", "social-unrest.norm-stem.lam1.000.pkl"))
    elif etype == "shooting" or etype == "bombing" or etype == "conflict" or \
            etype == "hostage":
        return joblib.load(os.path.join(
            os.getenv("TREC_DATA"),
            "semsim", "terrorism.norm-stem.lam10.000.pkl"))

Example #16

0

Show file

File: DMC2016.py Project: Waffleboy/DMC2016

 def averageColor(df):
     nonlocal state
     print("Making: averageColor")
     if state == 1 and os.path.exists('pickleFiles/averageColor.pkl'):
         averageColor = joblib.load('pickleFiles/averageColor.pkl')
     elif state == 0 and os.path.exists('pickleFiles/averageColor_test.pkl'):
         averageColor = joblib.load('pickleFiles/averageColor_test.pkl')
     else:
         allColor = {} #find all the colours that customers buy
         for i in df.index:
             currCustomer = df['customerID'][i]
             if currCustomer not in allColor:
                 allColor[currCustomer] = [df['colorCode'][i]]
             else:
                 allColor[currCustomer].append(df['colorCode'][i])
         averageColor = {}
         for entry in allColor:
             if entry not in averageColor:
                 averageColor[entry] = np.mean(allColor[entry])
         if state == 1:
             joblib.dump(averageColor,'pickleFiles/averageColor.pkl')
         else:
             joblib.dump(averageColor,'pickleFiles/averageColor_test.pkl')
     avgcolor = pd.Series(name= 'averageColor', index=df.index)
     for i in df.index:
         customer = df['customerID'][i]
         avgcolor.set_value(i,averageColor[customer])
     df['averageColor'] = avgcolor
     return df

Example #17

0

Show file

File: DMC2016.py Project: Waffleboy/DMC2016

    def cheapskateItems(df):
        nonlocal state
        print("Making: cheapskateItems")
        if state == 1 and os.path.exists('pickleFiles/voucherToArticle.pkl'):
            voucherDic = joblib.load('pickleFiles/voucherToArticle.pkl')
        elif state == 0 and os.path.exists('pickleFiles/voucherToArticle_test.pkl'):
            voucherDic = joblib.load('pickleFiles/voucherToArticle_test.pkl')
        else:
            voucherDic = {}
            vouchers = df.groupby('voucherID')
            for idx,voucher in vouchers:
                if idx not in voucherDic:
                    voucherDic[idx] = Counter(voucher['articleID']).most_common()[0][0]
            if state == 1:
                joblib.dump(voucherDic,'pickleFiles/voucherToArticle.pkl')
            else:
                joblib.dump(voucherDic,'pickleFiles/voucherToArticle_test.pkl')

        articleSet = set(voucherDic.values())
        cheapArticle = pd.Series(name='cheapArticle',index=df.index)
        for i in df.index:
            article = df['articleID'][i]
            isCheap = 1 if article in articleSet else 0
            cheapArticle.set_value(i,isCheap)
        df['cheapArticle'] = cheapArticle
        return df

Example #18

0

Show file

File: DMC2016.py Project: Waffleboy/DMC2016

 def colorPopularity(df):
     print('Making: colorPopularity')
     nonlocal state
     if state == True and os.path.exists('pickleFiles/colorMap.pkl'):
         colorMap = joblib.load('pickleFiles/colorMap.pkl')
     elif state==False and os.path.exists('pickleFiles/colorMap_test.pkl'):
         colorMap = joblib.load('pickleFiles/colorMap_test.pkl')
     else:
         colorCount = Counter(df['colorCode'])
         popularColors = [i[0] for i in colorCount.most_common(5)]
         shittyColors = [j[0] for j in colorCount.most_common()[::-1] if j[1] < 5]
         colorMap = {}
         for color in df['colorCode']:
             if color not in colorMap:
                 if color in popularColors:
                     colorMap[color] = "popular"
                 elif color in shittyColors:
                     colorMap[color] = "unpopular"
                 else:
                     colorMap[color] = "neutral"
         if state == True:
             joblib.dump(colorMap,'pickleFiles/colorMap.pkl')
         else:
             joblib.dump(colorMap,'pickleFiles/colorMap_test.pkl')
     df['colorPopularity'] = df['colorCode'].map(colorMap)
     return df

Example #19

0

Show file

File: DMC2016.py Project: Waffleboy/DMC2016

    def modeSize(df):
        nonlocal state
        print('Making: mostFrequentSize and differenceSize')
        if state == 1 and os.path.exists('pickleFiles/modeSizesBought.pkl'):
            modeSizeData = joblib.load('pickleFiles/modeSizesBought.pkl')
        elif state == 0 and os.path.exists('pickleFiles/modeSizesBought_test.pkl'):
            modeSizeData = joblib.load('pickleFiles/modeSizesBought_test.pkl')
        else:
            allSize = {}
            for i in df.index: #find all sizes purchased by customers
                currCust = df['customerID'][i]
                if currCust not in allSize:
                    allSize[currCust] = [df['sizeCode'][i]]
                else:
                    allSize[currCust].append(df['sizeCode'][i])
            modeSize = {}
            for customer in allSize:
                if customer not in modeSize:
                    mode = Counter(allSize[customer]).most_common(1)[0][0]
                    modeSize[customer] = mode

            if state == 1:
                joblib.dump(modeSize,'pickleFiles/modeSizesBought.pkl')
            else:
                joblib.dump(modeSize,'pickleFiles/modeSizesBought_test.pkl')
            modeSizeData = modeSize

        mostFrequentSize = pd.Series(name= 'mostFrequentSize', index=df.index)
        for i in df.index:
            customer = df['customerID'][i]
            mostFrequentSize.set_value(i,modeSizeData[customer])
        df['modeSize'] = mostFrequentSize
        df['differenceModeSize'] = abs(mostFrequentSize - df['sizeCode'])
        return df

Example #20

0

Show file

File: server.py Project: kburaya/Gender-and-Age-Estimator

def callback(ch, method, properties, body):
    print(" [x] Received %r" % (body,))
    answer = dict()
    message = str(body)
    features = calculate_features(message[2:-2])
    features = np.array(features).reshape(1, -1)
    #scaler = StandardScaler()
    #features = scaler.fit_transform(features)

    file = 'Resources/AGE_model.pkl'
    age_model = joblib.load(file)

    file = 'Resources/GENDER_model.pkl'
    sex_model = joblib.load(file)

    answer['age'] = [age_model.predict(features)]
    age = answer['age'][0][0:1][0]

    age = str(age).replace("\r", "")
    age = str(age).replace("\n", "")
    answer['gender'] = [sex_model.predict(features)]
    gender = answer['gender'][0][0:1][0]

    print('Age: ' + str(age) + ', gender: ' + str(gender))
    print(" [x] Done")
    ch.basic_ack(delivery_tag=method.delivery_tag)

Example #21

0

Show file

File: views.py Project: dsaumyajit007/textnook

def predict_category_subcategory(book_name):
	data_set1 = pandas.Series(book_name.encode('ascii'))

    #Data Preprocessing
	data_set1 = data_set1.dropna(axis=0,how='any')
	data_set1 = data_set1.str.lower()

    #Manual removal List
	remove_list = ['edition','ed','edn', 'vol' , 'vol.' , '-' ,'i']


	data_set1[0] =' '.join([i for i in data_set1[0].split() if i not in remove_list])

	data_set1 = data_set1.apply(lambda x :re.sub(r'\w*\d\w*', '', x).strip())
	data_set1 = data_set1.apply(lambda x :re.sub(r'\([^)]*\)', ' ', x))
	data_set1 = data_set1.apply(lambda x :re.sub('[^A-Za-z0-9]+', ' ', x))
    #data_set['Category ID'] = data_set['Category ID']+"|"+data_set['Subcategory ID']


    #Stemming the book titles
	stemmer = LancasterStemmer()
	data_set1[0]=" ".join([stemmer.stem(i) for i in  data_set1[0].split()])

	clf = joblib.load(os.path.join(BASE_DIR+"/learners/",'category_predict.pkl'))
	ans = clf.predict(data_set1)
	sub_clf = joblib.load(os.path.join(BASE_DIR+"/learners/",'subcategory_predict.pkl'))
	sub_ans = sub_clf.predict(data_set1)
	return [ans[0],sub_ans[0]]

Example #22

0

Show file

File: app.py Project: wstcpyt/flask-travel

    def put(self):
        startgeocode_json = (request.form['startgeocode'])
        endgeocode_json = (request.form['endgeocode'])
        tripdistance_json = (request.form['tripdistance'])
        import json
        startgeocode = json.loads(startgeocode_json)
        endgeocode = json.loads(endgeocode_json)
        startlat = float(startgeocode['lat'])
        startlng = float(startgeocode['lng'])
        endlat = float(endgeocode['lat'])
        endlng = float(endgeocode['lng'])
        tripdist = float(tripdistance_json.split(" ")[0])
        hour = int(request.form['hour'])
        dayofweek = int(request.form['dayofweek'])
        lowspeedclf = joblib.load(os.path.join(APP_STATIC, 'costtime.pkl'))
        lowspeedx = [startlat,startlng,endlat,endlng,hour,dayofweek,tripdist]
        lowspeedy = int(lowspeedclf.predict(lowspeedx)[0])
        tripdurationclf = joblib.load(os.path.join(APP_STATIC, 'trip_duration.pkl'))
        tripduration_x = lowspeedx
        trip_duration_y = tripdurationclf.predict(tripduration_x)[0]

        duration_list = []
        lowspeed_list = []
        for i in range(24):
            x = [startlat,startlng,endlat,endlng,i,dayofweek,tripdist]
            duration_list.append([i,int(tripdurationclf.predict(x)[0])])
            lowspeed_list.append([i,int(lowspeedclf.predict(x)[0])])

        return {"lowspeedtime":lowspeedy, "tripduration":trip_duration_y, "duration_list":duration_list, "lowspeed_list":lowspeed_list}

Example #23

0

Show file

File: contest.py Project: fightentropy01/WatsonGMC

def train_and_single_label(train_filename, test_filename, clf, pickled):
    """ Only return one example ID for each q_id
    """
    if pickled:
        train_data = joblib.load(train_filename)
        test_data = joblib.load(test_filename)
    else:
        train_data = extract_ibm_data(train_filename)
        test_data = extract_ibm_data(test_filename, test_file=True)

    X = train_data["data"]
    y = train_data["target"]
    clf.fit(X, y)

    labels = clf.predict(test_data["data"])
    # now manipulate the results using test_data['q_id'] to filter the labels
    ##NEW CODE:
    used_qids = []
    results = []
    for i in range(len(labels)):
        if labels[i] == "true":
            if not test_data["q_id"][i] in used_qids:
                results.append(test_data["id"][i])
                used_qids.append(test_data["q_id"][i])
    return results

Example #24

0

Show file

File: SVM.py Project: alex-wyc/cstuy-ship-projects

def loadModule(mode):
    global movieReviewer
    try:
        movieReviewer = joblib.load("./SVM/movieReviewer%s.svm" % mode)
    except:
        import SVMTrain
        movieReviewer = joblib.load("./SVM/movieReviewer%s.svm" % mode)

Example #25

0

Show file

File: predict.py Project: Payback80/porn_sieve

    def __init__(self):
        if ("model.pkl" in os.listdir()) and ("enc.pkl" in os.listdir()):
            self.model = joblib.load("model.pkl")
            self.enc = joblib.load("enc.pkl")

        else:
            self.refit_from_scratch()

Example #26

0

Show file

File: hearthstone_utils.py Project: dtaralla/hearthstone

def roc_precision_final(db, fac=1):
    if (os.path.exists(MAT_PATH) == False):
        os.mkdir(MAT_PATH)
        
    random_state = check_random_state(0)
    
    print("Loading {}...".format(db))
    clf = joblib.load("clfs/" + db)
        
    classes = clf.classes_
    
    print("Loading test set...")
    loaded = joblib.load("testSet/" + db)
    y_true = loaded[:, -1]

    
    print("Predict proba...")
    y_score = clf.predict_proba(loaded[:, 0:-1])
    loaded = 0
    clf = 0
    y_score = y_score[:, classes == 1] * fac
    
    print("ROC...")
    if (fac != 1):
        db = db + str(fac)
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    sio.savemat(MAT_PATH + 'final.roc.' + db + '.mat', {'fpr':fpr, 'tpr':tpr, 'thresholds':thresholds})
    
    print("Precision/Recall...")
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    sio.savemat(MAT_PATH + 'final.precall.' + db + '.mat', {'precision':precision, 'recall':recall, 'thresholds':thresholds})

Example #27

0

Show file

File: tools.py Project: m3at/Labelizer

def load_models(path="models",models={}):
    x = os.listdir(path)
    models = models
    for i in x:
        try:
            if not i.startswith('.') and not i.startswith('_') and os.path.isdir(os.path.join(path, i)):
                way = os.path.join(path, i)
                clf = glob.glob(os.path.join(way,"clf_*.pkl"))
                vec = glob.glob(os.path.join(way,"vectorizer_*.pkl"))
                print(". %s"%(way))
                if len(clf)!=1 or len(vec)!=1:
                    print("└── No model found in '%s'. Skipped."%(i))
                    continue
                t0=time()
                sys.stdout.flush()
                print("├── Loading classifier '%s'..."%(i))
                sys.stdout.flush()
                if "clf_%s"%(i) not in models:
                    models["clf_%s"%(i)] = joblib.load(clf[0])
                    print("├── Done. [%.02fs]"%(time()-t0))
                    sys.stdout.flush()
                t0=time()
                print("├── Loading vectorizer '%s'..."%(i))
                sys.stdout.flush()
                if "vectorizer_%s"%(i) not in models:
                    models["vectorizer_%s"%(i)] = joblib.load(vec[0])
                    print("└── Done. [%.02fs]"%(time()-t0))
                    sys.stdout.flush()
                t0=time()
        except:
            print(">> Error on '%s', skipped."%(i))
    return models

Example #28

0

Show file

File: classifier.py Project: Samreay/GeminiMaffei1

    def getClassifiers(self):
        if not os.path.exists(self.outDir):
            os.mkdir(self.outDir)
        outDir = self.outDir + os.sep + "classPickle"
        if not os.path.exists(outDir):
            os.mkdir(outDir)
        class1Save = outDir + os.sep + "classifier1.pkl"
        class2Save = outDir + os.sep + "classifier2.pkl"
        
        class1Exists = os.path.exists(class1Save)
        class2Exists = os.path.exists(class2Save)

        if not (class1Exists and class2Exists):
            self._setupTempDir()
            self.fitsFiles = [f[:-5] for f in os.listdir(self.fitsFolder) if ".fits" in f]
            self.fitsFilesLoc = [os.path.abspath(self.fitsFolder + os.sep + f) for f in os.listdir(self.fitsFolder) if ".fits" in f]
            
            for f in self.fitsFiles:
                self.mainCatalog[f] = self.getCatalog(self.fitsFolder + os.sep + f + ".fits", ishape=True)
                self.candidateMask[f] = self._getCandidateMask(self.mainCatalog[f], np.loadtxt(self.fitsFolder + os.sep + f + ".txt"))
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'WEIGHT', self.candidateMask[f] * 1.0, usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'EXTENDED', self.candidateMask[f], usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'HLR', np.zeros(self.mainCatalog[f].shape), usemask=False)    
                self.mainCatalog[f] = append_fields(self.mainCatalog[f], 'MAG', np.zeros(self.mainCatalog[f].shape), usemask=False)
            self._trainClassifier()
            joblib.dump(self.sc, class1Save) 
            joblib.dump(self.sc2, class2Save) 
        else:
            self.sc = joblib.load(class1Save)
            self.sc2 = joblib.load(class2Save)
            

        #self._testClassifier(catalog, candidateMask)
        #self._cleanTempDir()
        self._debug("Classifier generated. Now you can invoke .clasify(catalog)")

Example #29

0

Show file

File: decomposition.py Project: yelite/KDD2014

    def _train(self, train_data, resources):
        sample_length = len(train_data)
        dict_status_path = os.path.join(root_dic,
                                        'dict_vectorizer_{}.status'.
                                        format(sample_length))
        if os.path.isfile(dict_status_path):
            dictVectorizer = joblib.load(dict_status_path)
        else:
            dictVectorizer = DictVectorizer()
            dictVectorizer.fit(train_data[self.features].
                               fillna(0).
                               to_dict('record'))
            joblib.dump(dictVectorizer, dict_status_path)

        tfidf_status_path = os.path.join(root_dic,
                                         'tfidf_vectorizer_{}.status'.
                                         format(sample_length))
        if os.path.isfile(tfidf_status_path):
            tfidf = joblib.load(tfidf_status_path)
        else:
            tfidf = TfidfVectorizer(min_df=40, max_features=300)
            tfidf.fit(train_data.essay)
            joblib.dump(tfidf, tfidf_status_path)

        resources['dictVectorizer'] = dictVectorizer
        resources['tfidf'] = tfidf
        print 'Head Processing Completed'
        return train_data, resources

Example #30

0

Show file

File: generate.py Project: aaajiao/typeface

def load_model(model_path):
  gen_params_values = joblib.load(model_path+'_gen_params.jl')
  for p, v in izip(gen_params, gen_params_values):
      p.set_value(v)
  discrim_params_values = joblib.load(model_path+'_discrim_params.jl')
  for p, v in izip(discrim_params, discrim_params_values):
      p.set_value(v)

Example #31

0

Show file

def do_gbdt(train_x,
            train_y,
            test_x=None,
            test_y=None,
            learning_rate=0.03,
            max_depth=8,
            max_features=25,
            n_estimators=600,
            load=False,
            save=True,
            outfile=None,
            search=False):
    if search == False:
        mdl_name = 'gbdt_train_lr' + str(learning_rate) + '_n' + str(
            n_estimators) + '_maxdep' + str(max_depth) + '.pkl'
        if os.path.exists(mdl_name) == True:
            clf_gbdt = joblib.load(mdl_name)
        else:
            # create gradient boosting
            clf_gbdt = GradientBoostingClassifier(learning_rate=learning_rate,
                                                  max_depth=max_depth,
                                                  max_features=max_features,
                                                  n_estimators=n_estimators)
            #n_estimators=500, learning_rate=0.5, max_depth=3)
            clf_gbdt.fit(train_x, train_y)
            if save == True:
                try:
                    _ = joblib.dump(clf_gbdt, mdl_name, compress=1)
                except:
                    print("*** Save GBM model to pickle failed!!!")
                    if outfile != None:
                        outfile.write("*** Save RF model to pickle failed!!!")
        if test_x != None and test_y != None:
            probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
            score_gbdt = roc_auc_score(test_y, probas_gbdt)
            print("GBDT ROC score", score_gbdt)
        return clf_gbdt
    else:
        max_depth_list = [5, 6, 7]
        n_list = [2000, 3000]
        lr_list = [0.01, 0.005]
        info = {}
        for md in max_depth_list:
            for n in n_list:
                for lr in lr_list:
                    print 'max_depth = ', md
                    print 'n = ', n
                    print 'learning rate = ', lr
                    clf_gbdt = GradientBoostingClassifier(
                        learning_rate=learning_rate,
                        max_depth=md,
                        max_features=max_features,
                        n_estimators=n_estimators)
                    # n_estimators=500, learning_rate=0.5, max_depth=3)
                    clf_gbdt.fit(train_x, train_y)
                    probas_gbdt = clf_gbdt.predict_proba(test_x)[:, 1]
                    score_gbdt = roc_auc_score(test_y, probas_gbdt)
                    info[md, n, lr] = score_gbdt
        for md in info:
            scores = info[md]
            print(
                'GBDT max_depth = %d, n = %d, lr = %.5f, ROC score = %.5f(%.5f)'
                % (md[0], md[1], md[2], scores.mean(), scores.std()))

Example #32

0

Show file

File: covert.py Project: ekiyang/Fooling-voice

    source_wav_file = sys.argv[1]
    gmm_file = sys.argv[2]
    converted_wav_file = sys.argv[3]

    # 変換元のwavファイルからメルケプストラムとピッチを抽出
    # numpyで読みやすいようにアスキー形式で保存
    print "extract mcep ..."
    source_mcep_file = "source.mcep_ascii"
    extract_mcep(source_wav_file, source_mcep_file, ascii=True)

    print "extract pitch ..."
    source_pitch_file = "source.pitch"
    extract_pitch(source_wav_file, source_pitch_file)

    # GMMをロード
    gmm = joblib.load(gmm_file)

    # 変換元のメルケプストラムをGMMで変換
    # SPTKで合成できるようにバイナリ形式で保存
    print "convert mcep ..."
    converted_mcep_file = "converted.mcep"
    convert_mcep(source_mcep_file, converted_mcep_file, gmm)

    # 変換元のピッチと変換したメルケプストラムから再合成
    print "synthesis ..."
    synthesis(source_pitch_file, converted_mcep_file, converted_wav_file)

    # 一時ファイルを削除
    os.remove(source_mcep_file)
    os.remove(source_pitch_file)
    os.remove(converted_mcep_file)

Example #33

0

Show file

def load_model(model):
    joblib.load(model)

Example #34

0

Show file

File: main.py Project: pulkitver1991/Road-Segmentation

				raise
			# hancle Index exceptions
			except IndexError:
				counter = counter + 1
				# print("counts : ",counter)
				# print("error handled")

		# save all the train and test vectors
		joblib.dump(train_data, 'train_data.pkl') 
		joblib.dump(test_data, 'test_data.pkl') 
		joblib.dump(train_labels, 'train_labels.pkl') 
		joblib.dump(test_labels, 'test_labels.pkl') 
	else:
		# run only if create_vectors_again is unset
		# load all the train and test vectors
		train_data = joblib.load('train_data.pkl')
		test_data = joblib.load('test_data.pkl')
		train_labels = joblib.load('train_labels.pkl')
		test_labels = joblib.load('test_labels.pkl')

	# fit the model again if start_training_model is set
	if R_SEG.start_training_model == 1:
		clf = R_SEG.trainClassifier( R_SEG.select_classifier, train_data, train_labels )
	else:
		# load the fitted model if start_training_model is unset
		clf = joblib.load( R_SEG.select_classifier + '_model.pkl' )
	# find all the predictions on all test images
	predictions = R_SEG.predictImages(clf, test_data, test_labels)
	# save all the images
	R_SEG.saveSegmentedImage( num_test, image_filenames_test, predictions )

Example #35

0

Show file

File: dummy.py Project: ZachGerstner/Mood_Disorders

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
from random import randint
from matplotlib.lines import Line2D
from sklearn.cluster import KMeans
from scipy.spatial import distance
from sklearn.externals import joblib


clf = joblib.load('Mood_disorder_45')
clf.predict()
'''
data = pd.read_csv('GTEx_v7_brain_subGEM-log-no.txt',sep='\t')
data = data.transpose()
print(data.shape)

data = data.fillna(0)

global_max = data.max().max()
global_min = data.min().min()

print(global_max)
'''
centroid = clf.cluster_centers_

num_clusters = 45

fig = plt.figure()
for i in range(0,num_clusters):

Example #36

0

Show file

File: prediction.py Project: vijayiitkgp/songs_mood_classification


if __name__ == "__main__":

    import argparse
    import sys

    # Initialize parser
    parser = argparse.ArgumentParser()

    # Adding optional argument
    parser.add_argument("-file", "--File", help="Input Test File Path")
    parser.add_argument("-model", "--Model", help="Input Model Path")

    # Read arguments from command line
    args = parser.parse_args()

    if args.File:
        final_test, X = create_test_data(file_path=str(args.File))

    if args.Model:
        model = joblib.load(str(args.Model))
        y_pred_test = model.predict(X)
        classes = {0: 'SAD', 1: 'HAPPY'}
        final_test['MOOD_TAG'] = y_pred_test
        final_test['MOOD_TAG'] = [
            classes[item] for item in final_test['MOOD_TAG']
        ]
        file_name = "evaluation_classified.csv"
        final_test.to_csv(file_name, index=None, header=True)

Example #37

0

Show file

File: 982_predict_f030_0311-2.py Project: xwc940512/Microsoft-Malware-Prediction

files_tr = sorted(glob('../data/train_f*.f'))

# USE_PREF
li = []
for i in files_tr:
    for j in USE_PREF:
        if j in i:
            li.append(i)
            break
files_tr = li

[print(i, f) for i, f in enumerate(files_tr)]

X_train = pd.concat(
    [pd.read_feather(f) for f in tqdm(files_tr, mininterval=30)] +
    [joblib.load('../external/X_train_nejumi.pkl.gz')],
    axis=1)

y_train = utils.load_target()['HasDetections']

# drop
if len(col_drop) > 0:
    X_train.drop(col_drop, axis=1, inplace=True)

if X_train.columns.duplicated().sum() > 0:
    raise Exception(
        f'duplicated!: { X_train.columns[X_train.columns.duplicated()] }')
print('no dup :) ')
print(f'X_train.shape {X_train.shape}')

gc.collect()

Example #38

0

Show file

File: c_predictor.py Project: ijaradat/proppy

def load_model(serialized_model):
    model = joblib.load(serialized_model)
    logging.info("Model loaded from %s", serialized_model)
    return model

Example #39

0

Show file

# get data from database
def get_data():
	sql_con=MySQLdb.connect(
	    host='127.0.0.1',
	    port= 3306,
	    user='******',
	    passwd='XXXXXX',
	    db='Hackthon2019',
	    use_unicode=True,
	    charset="utf8"
	)
	sql_cur=sql_con.cursor()
	sql_cur.execute("SELECT using FROM training_data")
	using=sql_con.commit()
	sql_cur.execute("SELECT pnexts FROM training_data")
	pre_nexts=sql_con.commit()

	return (using, pre_nexts)

# train model
training()

model = joblib.load(model_name)

now_using = "photoshop"
result = model.predict(now_using)

# send message to windows computer
notify = Notify()
notify.register()
notify.send("Also open "+str(result)+"?") # should be picture

Example #40

0

Show file

def do_RF(train_x,
          train_y,
          test_x=None,
          test_y=None,
          n_estimators=2000,
          max_depth=20,
          max_features=20,
          criterion='entropy',
          method='isotonic',
          cv=5,
          min_samples_leaf=1,
          min_samples_split=13,
          random_state=4141,
          n_jobs=-1,
          load=False,
          save=True,
          outfile=None,
          search=False):
    if search == False:
        #mdl_name = 'rf_train_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '_maxfeat' + str(max_features) \
        mdl_name = 'rf_isotonic_train_n' + str(n_estimators) + '_maxdep' + str(max_depth) + '_maxfeat' + str(max_features) \
                   + '_minSamLeaf' + str(min_samples_leaf) + '_minSamSplit' + str(min_samples_split) + '.pkl'
        if os.path.exists(mdl_name) == True:
            clf_rf_isotonic = joblib.load(mdl_name)
        else:
            clf_rf = RandomForestClassifier(
                n_estimators=n_estimators,
                max_depth=max_depth,
                max_features=max_features,
                criterion=criterion,
                min_samples_leaf=min_samples_leaf,
                min_samples_split=min_samples_split,
                random_state=random_state,
                n_jobs=n_jobs)
            clf_rf_isotonic = CalibratedClassifierCV(clf_rf,
                                                     cv=cv,
                                                     method=method)
            clf_rf_isotonic.fit(train_x, train_y)
            if save == True:
                try:
                    _ = joblib.dump(clf_rf_isotonic, mdl_name, compress=1)
                except:
                    print("*** Save RF model to pickle failed!!!")
                    if outfile != None:
                        outfile.write("*** Save RF model to pickle failed!!!")
        if test_x != None and test_y != None:
            probas_rf = clf_rf_isotonic.predict_proba(test_x)[:, 1]
            score_rf = roc_auc_score(test_y, probas_rf)
            print("RF ROC score", score_rf)
        return clf_rf_isotonic
    else:
        if test_x == None or test_y == None:
            print "Have to provide test_x and test_y to do grid search!"
            return -1

        min_samples_split = [10, 11, 12]
        max_depth_list = [15, 20, 25]
        n_list = [2000]
        max_feat_list = [10, 20, 30]
        info = {}
        for mss in min_samples_split:
            for max_depth in max_depth_list:
                #for n in n_list:
                for max_features in max_feat_list:
                    print 'max_features = ', max_features
                    n = 2000
                    print 'n = ', n
                    print 'min_samples_split = ', mss
                    print 'max_depth = ', max_depth
                    clf_rf = RandomForestClassifier(
                        n_estimators=n,
                        max_depth=max_depth,
                        max_features=max_features,
                        criterion=criterion,
                        min_samples_leaf=min_samples_leaf,
                        min_samples_split=mss,
                        random_state=random_state,
                        n_jobs=n_jobs)
                    #clf_rf.fit(train_x, train_y)
                    clf_rf_isotonic = CalibratedClassifierCV(clf_rf,
                                                             cv=cv,
                                                             method=method)
                    clf_rf_isotonic.fit(train_x, train_y)
                    probas_rf = clf_rf_isotonic.predict_proba(test_x)[:, 1]
                    scores = roc_auc_score(test_y, probas_rf)
                    info[max_features, mss, max_depth] = scores
        for mss in info:
            scores = info[mss]
            print(
                'clf_rf_isotonic: max_features = %d, min_samples_split = %d, max_depth = %d, ROC score = %.5f(%.5f)'
                % (mss[0], mss[1], mss[2], scores.mean(), scores.std()))

Example #41

0

Show file

        "B":{
        "0":396.9
        },
        "LSTAT":{
        "0":4.98
        }
        
        result looks like:
        { "prediction": [ <val> ] }
        
        """

    # Logging the input payload
    json_payload = request.json
    LOG.info(f"JSON payload: \n{json_payload}")
    inference_payload = pd.DataFrame(json_payload)
    LOG.info(f"Inference payload DataFrame: \n{inference_payload}")
    # scale the input
    scaled_payload = scale(inference_payload)
    # get an output prediction from the pretrained model, clf
    prediction = list(clf.predict(scaled_payload))
    # TO DO:  Log the output prediction value
    LOG.info(f"prediction: {prediction}")
    return jsonify({'prediction': prediction})


if __name__ == "__main__":
    # load pretrained model as clf
    clf = joblib.load("./model_data/boston_housing_prediction.joblib")
    app.run(host='0.0.0.0', port=80, debug=True)  # specify port=80

Example #42

0

Show file

File: train.py Project: aveydd/Python-Percetive-Analysis

def load_model(filename):
    return joblib.load(filename)

Example #43

0

Show file

File: test_LDA.py Project: plaban1981/Vehicle-Type-Detection

    include_top=False)

#
in_tensor = base_model.inputs[0]
out_tensor = base_model.outputs[0]

out_tensor = tf.keras.layers.GlobalAveragePooling2D()(out_tensor)

# Define the full model by the endpoints
model = tf.keras.models.Model(inputs=[in_tensor], outputs=[out_tensor])

# Compile the model for execution. Losses and optimizers can be
# anything here, since we don't train the model
model.compile(loss="categorical_crossentropy", optimizer='sgd')

LDA = joblib.load('trained_LDA')

with open("submission_LDA.csv", "w") as fp:
    fp.write("Id,Category\n")

    # Image index
    i = 0
    # 1. load image and resize
    for file in os.listdir("test\\testset"):
        if file.endswith(".jpg"):
            # Load the image
            img = plt.imread("test\\testset\\" + file)

            # Resize it to the net input size:
            img = cv2.resize(img, (224, 224))

Example #44

0

Show file

import keyboard
import collections
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import svm
from sklearn.externals import joblib
import time
from scipy.ndimage.filters import gaussian_filter

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

#svm_model_linear = joblib.load('filtered.pkl')
clf = joblib.load('Quad.pkl')
print("press cntrl to see real time data!")


class MyListener(myo.DeviceListener):
    def __init__(self, queue_size=8):
        self.lock = threading.Lock()
        self.emg_data_queue = collections.deque(maxlen=queue_size)
        self.gyro_data_queue = collections.deque(maxlen=3)
        self.ori_data_queue = collections.deque(maxlen=4)
        self.acc_data_queue = collections.deque(maxlen=3)

    def on_connect(self, device, timestamp, firmware_version):
        device.set_stream_emg(myo.StreamEmg.enabled)

    def on_emg_data(self, device, timestamp, emg_data):

Example #45

0

Show file

#import numpy as np
#import matplotlib.pyplot as plt
#import pandas as pd
import re

from sklearn.externals import joblib
import pickle
#saved_classifier = joblib.load('saved_classifier.sav')

saved_classifier = joblib.load("class.pkl")
my_cv = joblib.load("my_cv.pkl")
'''import re
import nltk
nltk.download('stopwords')'''
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

#dataset = pd.read_csv('train.csv')
#train = dataset.iloc[0:20001, 1:]
#y = dataset.iloc[0:len(train), 2:].values

sentences = []
'''for i in range(0,len(train)):
    sent = re.sub(r"i'm", "i am", train['comment_text'][i])
    sent = re.sub(r"he's", "he is", train['comment_text'][i])
    sent = re.sub(r"she's", "she is", train['comment_text'][i])
    sent = re.sub(r"that's", "that is", train['comment_text'][i])
    sent = re.sub(r"what's", "what is", train['comment_text'][i])
    sent = re.sub(r"where's", "where is", train['comment_text'][i])
    sent = re.sub(r"how's", "how is", train['comment_text'][i])

Example #46

0

Show file

File: extract_gmm_align_indexes.py Project: umasaki1182/yukarin

parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()
pprint(vars(arguments))

# read parameters from speaker yml
sconf1 = SpeakerYML(arguments.org_yml)
sconf2 = SpeakerYML(arguments.tar_yml)
pconf = PairYML(arguments.pair_yml)

# read GMM for mcep
mcepgmm = GMMConvertor(
    n_mix=pconf.GMM_mcep_n_mix,
    covtype=pconf.GMM_mcep_covtype,
    gmmmode=None,
)
param = joblib.load(arguments.gmm)
mcepgmm.open_from_param(param)

# constract FeatureExtractor class
feat1 = FeatureExtractor(
    analyzer=sconf1.analyzer,
    fs=sconf1.wav_fs,
    fftl=sconf1.wav_fftl,
    shiftms=sconf1.wav_shiftms,
    minf0=sconf1.f0_minf0,
    maxf0=sconf1.f0_maxf0,
)
feat2 = FeatureExtractor(
    analyzer=sconf2.analyzer,
    fs=sconf2.wav_fs,
    fftl=sconf2.wav_fftl,

Example #47

0

Show file

File: EstimatingImprovements.py Project: smriti100jain/DataAugmentation__CV

lrelu = activations.LeakyRectify(leak=0.2)
sigmoid = activations.Sigmoid()

trX, vaX, teX, trY, vaY, teY = pastaBlackWhite()

vaX = floatX(vaX) / 127.5 - 1.
trX = floatX(trX) / 127.5 - 1.
teX = floatX(teX) / 127.5 - 1.

X = T.tensor4()

desc = 'cond_dcgan'
epoch = 5999
params = [
    sharedX(p)
    for p in joblib.load('models/%s/%d_discrim_params.jl' % (desc, epoch))
]
print desc.upper()
print 'epoch %d' % epoch


def mean_and_var(X):
    u = T.mean(X, axis=[0, 2, 3])
    s = T.mean(T.sqr(X - u.dimshuffle('x', 0, 'x', 'x')), axis=[0, 2, 3])
    return u, s


def bnorm_statistics(X, w, w2, g2, b2, w3, g3, b3, wy):
    h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))

    h2 = dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))

Example #48

0

Show file

File: ModelUtils.py Project: kusogray/Telstra

def loadModel(modelPath):

    log("Start load model: ", modelPath)
    clf = joblib.load(modelPath)
    return clf

Example #49

0

Show file

File: classifier_svm.py Project: yaoleihxr/text-classification

 def reload(self, filename):
     self.logger.info("reload")
     self.clf = joblib.load(filename)

Example #50

0

Show file

def predicao():
    ss = StandardScaler()

    ss = StandardScaler()
    # desabilita mensagens de aviso
    pd.options.mode.chained_assignment = None  # default='warn'

    # obtem dados para criar modelo
    df = pd.read_csv('registro_candidatos.csv')

    # obter recursos e resultados correspondentes
    feature_names = [
        'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao',
        'ErrosPraticos'
    ]

    training_features = df[feature_names]

    outcome_name = ['Recomenda']
    outcome_labels = df[outcome_name]

    #  listar recursos com base no tipo
    numeric_feature_names = ['MediaAvaliacao', 'ErrosPraticos']
    categoricial_feature_names = ['Nota', 'DinamicadeGrupo', 'Agressividade']

    # Ajusta como 'scaler' os recursos numéricos
    ss.fit(training_features[numeric_feature_names])

    # scale numeric features now
    training_features[numeric_feature_names] = ss.transform(
        training_features[numeric_feature_names])

    training_features = pd.get_dummies(training_features,
                                       columns=categoricial_feature_names)

    # print(training_features)

    categorical_engineered_features = list(
        set(training_features.columns) - set(numeric_feature_names))

    feature_names = [
        'Nota', 'DinamicadeGrupo', 'Agressividade', 'MediaAvaliacao',
        'ErrosPraticos'
    ]
    numeric_feature_names = ['MediaAvaliacao', 'ErrosPraticos']
    categoricial_feature_names = ['Nota', 'DinamicadeGrupo', 'Agressividade']

    model = joblib.load(r'Model/model.pickle')
    scaler = joblib.load(r'Scaler/scaler.pickle')

    ## novos dados para classificação
    root.geometry("500x500")

    w = Label(root, text="Nome do Candidato")
    w.pack()
    nome2 = Entry(root)
    nome2.pack()
    nome2.delete(0, END)
    nome2.insert(0, "")

    w = Label(root, text="")
    w.pack()

    w = Label(
        root,
        text="Digite a Nota:\n(Excelente - Alta - Boa - Média - Ruim - Péssima)"
    )
    w.pack()
    nota2 = Entry(root)
    nota2.pack()
    nota2.delete(0, END)
    nota2.insert(0, "")

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Tem dinâmica de Grupo? (Sim - Não)")
    w.pack()
    dinamica = Entry(root)
    dinamica.pack()
    dinamica.delete(0, END)
    dinamica.insert(0, "")

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Tem agressividade? (Sim - Não)")
    w.pack()
    agressivida = Entry(root)
    agressivida.pack()
    agressivida.delete(0, END)

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Média na avaliação:")
    w.pack()
    media = Entry(root)
    media.pack()
    media.delete(0, END)

    w = Label(root, text="")
    w.pack()

    w = Label(root, text="Erros em teste prático:")
    w.pack()
    erros = Entry(root)
    erros.pack()
    erros.delete(0, END)

    w = Label(root, text="")
    w.pack()

    def consul():
        nome = nome2.get()
        nota = nota2.get()
        dinamicadegrupo = dinamica.get()
        agressividade = agressivida.get()
        mediaavaliacao = int(media.get())
        errospraticos = int(erros.get())

        new_data = pd.DataFrame([{
            'Nome': nome,
            'Nota': nota,
            'DinamicadeGrupo': dinamicadegrupo,
            'Agressividade': agressividade,
            'MediaAvaliacao': int(mediaavaliacao),
            'ErrosPraticos': int(errospraticos)
        }])
        new_data = new_data[[
            'Nome', 'Nota', 'DinamicadeGrupo', 'Agressividade',
            'MediaAvaliacao', 'ErrosPraticos'
        ]]

        ## preparando predição com novos dados
        prediction_features = new_data[feature_names]

        # escalando
        prediction_features[numeric_feature_names] = scaler.transform(
            prediction_features[numeric_feature_names])

        # Variáveis categóricas
        prediction_features = pd.get_dummies(
            prediction_features, columns=categoricial_feature_names)

        # adicionar coluna de recurso categórico ausente

        current_categorical_engineered_features = set(
            prediction_features.columns) - set(numeric_feature_names)
        missing_features = set(categorical_engineered_features
                               ) - current_categorical_engineered_features
        for feature in missing_features:
            # add zeros, desde que o recurso esteja ausente nessas amostras de dados
            prediction_features[feature] = [0] * len(prediction_features)

        # predição usando o modelo previamente treinado
        predictions = model.predict(prediction_features)

        # Resultados
        new_data['Recomenda'] = predictions

        print(new_data)
        tkMessageBox.showinfo(
            "Consulta Concluída!",
            "O sistema recomenda este candidato:\n *** " +
            str(new_data['Recomenda'][0] + str("***\n")))

    busca = Button(root, text="Consultar", command=lambda: consul())
    busca.pack()
    root.mainloop()

Example #51

0

Show file

File: index.py Project: suzan321/phishsing-detection-PHP-python-

#!C:\Users\sujan\AppData\Local\Programs\Python\Python37\python
print("Content-type: text/html\r\n\n")
# -*- coding: utf-8 -*-

#importing libraries
from sklearn.externals import joblib
import inputScript
import sys

#load the pickle file
classifier = joblib.load('rf_final.pkl')

#input url

url = sys.argv[1]
#checking and predicting
checkprediction = inputScript.main(url)
prediction = classifier.predict(checkprediction)
if prediction == 1:
    print(" THIS IS PHISHING URL")
else:
    print(" THIS IS NOT PHISHING URL")

Example #52

0

Show file

for plot_num, folder in enumerate(to_consider):
    df = pd.read_csv("../../data/hvac/minutes_%s.csv" % folder)
    df["hvac_class_copy"] = df["hvac_class"].copy()
    df = df[df.dataid.isin(find_common_dataids())]
    df.index = range(len(df))

    if NUM_CLASSES == 2:
        df.hvac_class[(df.hvac_class == "Average") |
                      (df.hvac_class == "Good")] = "Not bad"
        COLUMN_NAMES = ["Bad", "Not bad"]
    else:
        COLUMN_NAMES = ["Average", "Bad", "Good"]

    np.random.seed(0)
    clf = joblib.load(
        os.path.expanduser("~/git/nilm-actionable/data/hvac/rf_hvac.pkl"))
    true_labels = df['hvac_class'].values
    pred_labels = clf.predict(df[list(f)])

    numeric_cols = f
    df[numeric_cols] = df[numeric_cols].div(df[numeric_cols].max())
    accur = accuracy_multiclass(true_labels, pred_labels)

    print folder
    print accur
    print pd.value_counts(pred_labels)
    confusion_df = pd.DataFrame(confusion_matrix(true_labels, pred_labels),
                                index=["Feedback", "No Feedback"],
                                columns=["Feedback", "No Feedback"])
    sns.heatmap(confusion_df, annot=True, fmt="d", linewidths=.5, ax=ax)
    #ax.set_title(return_name(folder)[0])

Example #53

0

Show file

File: run.py Project: dheerajmitra/DisasterResponsePipeline

    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterRis', engine)

# load model
model = joblib.load("../models/classifier.pkl")


# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():

    # extract data needed for visuals
    # TODO: Below is an example - modify to extract data for your own visuals
    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index)

    # create visuals
    # TODO: Below is an example - modify to create your own visuals
    catg_nam = df.iloc[:, 4:].columns

Example #54

0

Show file

def load_model(filename="./model_data.pkl"):
    model_data = joblib.load(filename)
    return model_data["classifier"], model_data["scaler"]

Example #55

0

Show file

File: tweepnet3j.py Project: douglasdavis/twaml

def load_model_and_scaler(from_dir):
    with open(f"{from_dir}/model.json", "r") as f:
        simpmodel = model_from_json(f.read())
    simpmodel.load_weights(f"{from_dir}/model.h5")
    scaler = joblib.load(f"{from_dir}/scaler.save")
    return simpmodel, scaler

Example #56

0

Show file

def get_samples(foldername, filter=None):
    samples = []
    for file in os.listdir(foldername):
        if filter and file.find(filter) == -1:
            continue
        for sample in sample_file(foldername + '/' + file).get_samples():
            samples.append(sample)

    return samples


if __name__ == '__main__':
    arguments = docopt.docopt(__doc__)
    filters = {'dancing': 0, 'walking': 1, 'sitting': 2}
    if arguments['--model']:
        clf = joblib.load(arguments['--model'])
    else:
        training = dataset('../datasets/training', filters)

        svr = svm.SVC()
        exponential_range = [pow(10, i) for i in range(-4, 1)]
        parameters = {
            'kernel': ['linear', 'rbf'],
            'C': exponential_range,
            'gamma': exponential_range
        }
        clf = grid_search.GridSearchCV(svr, parameters, n_jobs=8, verbose=True)
        clf.fit(training.data, training.target)
        joblib.dump(clf, '../models/1s_6sps.pkl')
        print clf

Example #57

0

Show file

File: model_predict.py Project: wesley1001/StockML

    return preds

if __name__ == '__main__':
    '''
        call like 
        python model_predict.py IBM 5DayWindowBestLongBuyPrice
        python model_predict.py GSPC 5DayWindowBestLongBuyPrice IBM
    '''
    
    ticker = sys.argv[1]
    target = sys.argv[2]
    target_ticker = sys.argv[3] if len(sys.argv) > 3 and sys.argv[3] != '>' else ticker # Target Feature Set
    
    test_features_file = config.GetTestingFeaturesFileName(ticker)
    predictions_file = config.GetPredictionsFileName(ticker, target, target_ticker)
    model_file = config.GetModelFileName(ticker, target, target_ticker)
       
    # reconstitute trained model
    print "Loading model..."
    model = joblib.load(model_file)
    
    print "Loading data..."
    test_features = numpy.load(test_features_file)
    
    print "Making predictions..."
    predictions = predict(model, test_features)
    
    print "Saving predictions to file..."
    numpy.save(predictions_file, predictions)

Example #58

0

Show file

File: app.py Project: u-brixton/heroku-model-deploy


DB.create_tables([Prediction], safe=True)


# End database stuff
########################################

########################################
# Unpickle the previously-trained model


with open('columns.json') as fh:
    columns = json.load(fh)

pipeline = joblib.load('pipeline.pickle')

with open('dtypes.pickle', 'rb') as fh:
    dtypes = pickle.load(fh)


# End model un-pickling
########################################


########################################
# Begin webserver stuff

app = Flask(__name__)

Example #59

0

Show file

File: interface_v1.py Project: Janani-Mohan/rbe549_computer_vision

			yield (x, y, img[y:y + windowsize[1], x:x + windowsize[0]])

def hogs(img):
    gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
    gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...16)
    bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
    mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
    hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
    hist = np.hstack(hists)
    return hist     # hist is a 64 bit vector


path =os.getcwd()
Classifier = joblib.load('linear_2.pkl')
fi = path +"/svm/json/label_test.json"
with open(fi,'r') as files:
    label_test = np.array(json.load(files))
fi = path +"/svm/json/feature_test.json"
with open(fi,'r') as files:
    feature_test = np.array(json.load(files))

print "predicting.."
predict = Classifier.predict(feature_test)
print "Expected output:",label_test
print "Predicted output:",predict
print "Confusion Matrix:\n",metrics.confusion_matrix(label_test,predict)
print "Fowlkes Mallows Score",fowlkes_mallows_score(label_test,predict)

Example #60

0

Show file

File: MFCC_speaker_recog.py Project: ruimcbaros/Speaker-Recognition-System

        spct += 1

    print "Training Completed"

    confusion_matrix = np.zeros((total_sp, total_sp))
    tct = 0
    for speaker in speakers:
        if tct <= 0:
            tct = len(glob.glob('test_wavdata/' + speaker + '/*.wav'))
        for testcasefile in glob.glob('test_wavdata/' + speaker + '/*.wav'):
            [Fs, x] = audioBasicIO.readAudioFile(testcasefile)
            features = extract_MFCCs(x, Fs, window * Fs, window_overlap * Fs,
                                     voiced_threshold_mul,
                                     voiced_threshold_range, calc_deltas)
            max_score = -9999999
            max_speaker = speaker
            for modelfile in sorted(glob.glob('train_models/*.pkl')):
                gmm = joblib.load(modelfile)
                score = gmm.score(features)
                if score > max_score:
                    max_score, max_speaker = score, modelfile.replace(
                        'train_models/', '').replace('.pkl', '')
            print speaker + " -> " + max_speaker + (" Y" if speaker
                                                    == max_speaker else " N")
            confusion_matrix[speakers[speaker]][speakers[max_speaker]] += 1

    print "Accuracy: ", (sum([
        confusion_matrix[i][j] if i == j else 0 for i in xrange(total_sp)
        for j in xrange(total_sp)
    ]) * 100) / float(tct * total_sp)