def __init__(self, normalize = False):
     self.normalize = normalize
     if self.normalize == True:
         self.norm = normalizer()       
예제 #2
0
import re
import jieba
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm
import h5py
from sklearn.feature_extraction.text import TfidfVectorizer as TFIDF
from sklearn.svm import LinearSVC
from predictor import data
import bz2
from normalize import normalizer
dim = 500000
special_character_removal = re.compile(
    r'[@#$%^&*,.【】[]{};‘,。、?!?“”‘’; \\/"\']', re.IGNORECASE)
replace_numbers = re.compile(r'\d+', re.IGNORECASE)

normalizer = normalizer('word.txt')

word_len = 2


# cut = thulac.thulac(seg_only=True)
def seg(text):
    seg_list = jieba.cut(text.strip())
    seg_list = [word for word in seg_list if len(word) >= word_len]
    return " ".join(seg_list)


def text_to_wordlist(text):
    return " ".join(normalizer.seg_one_text(text, 2))
    # text = special_character_removal.sub('', text)
    # text = replace_numbers.sub('NUMBERREPLACE', text)
예제 #3
0
파일: main.py 프로젝트: saurabhsuman47/ML
@author: saurabh
"""

import numpy as np
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import r2_score, mean_squared_error
from load_data import load_data
from normalize import normalizer
import os

data_path = os.path.join("..", "ml-100k", "u.data")
X, y = load_data(data_path, test_percentage=10)
num_users, num_movies = X.shape

binary = (X > 0)
norm = normalizer()
X = norm.fit_transform(X)

predicted = X.copy()

clf = ElasticNetCV(alphas=[0.0125, 0.025, 0.05, .1, .125, .5, 1.0, 2.0, 4.0])
#clf = ElasticNetCV(alphas = [ .1])

for user in range(num_users):
    #bool array for movies rated by user
    movie_user = binary[user]
    #which users to consider as attributes for regression, in this case all except current user
    neighbors = np.ones((num_users), dtype=bool)
    neighbors[user] = False
    X_train_user = X[neighbors]
    X_train_user = X_train_user[:, movie_user].T
def main():
    fin = sys.argv[1]
    fin2 = sys.argv[2]
    readin(fin, fin2)
    input2 = open(fin2, 'r')
    nparr = np.loadtxt(fin2,delimiter=',')
    study = normal.normalizer(nparr)
    target = np.zeros((np.shape(study)[0],2))
    index = np.where(study[:,14] == 0)
    target[index,0] = 1
    index = np.where(study[:,14] == 1)
    target[index,1] = 1
    study = study[:,:-1]
    order = range(np.shape(study)[0])
    np.random.shuffle(order)
    study = study[order,:]
    target = target[order,:]
    thelen = 5
    Carray = []
    Tarray=[]
    TP = 0
    FN = 0
    FP = 0
    TN = 0
    for i in range(5):
        Carray.append([])
        Tarray.append([])

    ind = 0
    for i in range(len(study)):
        if (ind <5):
            Carray[ind].append(study[i])
            Tarray[ind].append(target[i])
            ind = ind +1
        else:
            ind = 0

    Errors = []
    Cons= []
    for i in range(20):
        for j in range(5):
            Target1 = np.empty((0,2))
            Target2 = np.empty((0,2))
            x1 = np.empty((0,14))
            x2 = np.empty((0,14))
            for k in range(5):
                if (k!= j):
                    for h in range(len(Carray[k])):
                        x1 = np.vstack((x1,Carray[k][h]))
                        Target1 = np.vstack((Target1,Tarray[k][h]))
                
            x2 = np.vstack((x2,Carray[j]))
            Target2= np.vstack((Target2,Tarray[j]))
            net = mlp(x1,Target1,5,momentum = 0.1, outtype='softmax')
            Errors.append(net.earlystopping(x1,Target1,x2,Target2,0.3))
            num = net.confmat(x2,Target2)
            Cons.append(num)
            print("\n")

    print("The CONS",Cons)
    Errors = np.array(Errors)

    
    Max = np.amax(Errors)
    print("The max error:", Max)
    
    Min = np.amin(Errors)
    print("the min error:", Min)
    
    Mean = np.mean(Errors)
    print("the mean error is:", Mean)

    Std = np.std(Errors)
    print("the STD is :", Std)

    for i in range(len(Cons)): 
        for j in range(2):
            if (j == 1):
                F1 = F1 + Cons[i][j][0]
                T2 = T2 + Cons[i][j][1]
            if (j == 0):
                T1 = T1 + Cons[i][j][0]
                F2 = F2 + Cons[i][j][1]
          
    
    T1 = T1 / len(Cons)
    F2 = F2 / len(Cons)
    F1 = F1 / len(Cons)
    T2 = T2 / len(Cons)
    Accuracy = ((T1 + T2)/(T1+T2+F1+F2))
    Sensitivity = (T1/(T1+F2))
    Specificity = (T2/(T2+F1))

    print("The Accuracy", Accuracy)                
    print("The Sensitivity",Sensitivity)     
    print("The Specificity", Specificity)
예제 #5
0
import pandas as pd
import re
import logging
import multiprocessing
from normalize import normalizer
import os
import json
import config as cf

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)

SOME_FIXED_SEED = 20

replace_numbers = re.compile(r'\d+', re.IGNORECASE)
normalizer_ = normalizer(
    'E:/pycharm/judicial_doc_measurement/utils/lda/word.txt')
word_len = 2
progress = 0


#获取单篇文章的法条
def get_law(node):
    law_tmp = ''
    law_list = []
    for subnode in node:
        if subnode.tag == 'FLFTFZ':
            name = ''
            for subsubnode in subnode:
                if subsubnode.tag == 'MC':
                    name = subsubnode.get('value')
                elif subsubnode.tag == 'T':
예제 #6
0
import numpy as np
import pandas as pd
import re
import logging
import multiprocessing
from normalize import normalizer
import os
import json

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

SOME_FIXED_SEED = 20


replace_numbers = re.compile(r'\d+', re.IGNORECASE)
normalizer_ = normalizer('./word.txt')
word_len = 2
progress = 0


#获取单篇文章的法条
def get_law(node):
    law_tmp = ''
    law_list = []
    for subnode in node:
        if subnode.tag == 'FLFTFZ':
            name = ''
            for subsubnode in subnode:
                if subsubnode.tag == 'MC':
                    name = subsubnode.get('value')
                elif subsubnode.tag == 'T':
예제 #7
0
from normalize import normalizer
from queue import Queue
import multiprocessing as mp
from method_handlers import *
import commands
import sys
import os

ERROR_FREE_CODE = True

if len(sys.argv) != 2:
    print("Usage: python3 parser.py </path/to/GCode_file>")
    sys.exit(1)

normalizer(sys.argv[1])


class Machine(object):

    ROOM_TEMP = 27

    def __init__(self,
                 instruction_queue,
                 message_queue,
                 current_position=(0, 0, 0),
                 origin=(0, 0, 0),
                 feed_rate=0,
                 extrusion_rate=0,
                 fan_on=False,
                 bed_temp=ROOM_TEMP,