def __init__(self, normalize = False): self.normalize = normalize if self.normalize == True: self.norm = normalizer()
import re import jieba from sklearn.preprocessing import MultiLabelBinarizer from tqdm import tqdm import h5py from sklearn.feature_extraction.text import TfidfVectorizer as TFIDF from sklearn.svm import LinearSVC from predictor import data import bz2 from normalize import normalizer dim = 500000 special_character_removal = re.compile( r'[@#$%^&*,.【】[]{};‘,。、?!?“”‘’; \\/"\']', re.IGNORECASE) replace_numbers = re.compile(r'\d+', re.IGNORECASE) normalizer = normalizer('word.txt') word_len = 2 # cut = thulac.thulac(seg_only=True) def seg(text): seg_list = jieba.cut(text.strip()) seg_list = [word for word in seg_list if len(word) >= word_len] return " ".join(seg_list) def text_to_wordlist(text): return " ".join(normalizer.seg_one_text(text, 2)) # text = special_character_removal.sub('', text) # text = replace_numbers.sub('NUMBERREPLACE', text)
@author: saurabh """ import numpy as np from sklearn.linear_model import ElasticNetCV from sklearn.metrics import r2_score, mean_squared_error from load_data import load_data from normalize import normalizer import os data_path = os.path.join("..", "ml-100k", "u.data") X, y = load_data(data_path, test_percentage=10) num_users, num_movies = X.shape binary = (X > 0) norm = normalizer() X = norm.fit_transform(X) predicted = X.copy() clf = ElasticNetCV(alphas=[0.0125, 0.025, 0.05, .1, .125, .5, 1.0, 2.0, 4.0]) #clf = ElasticNetCV(alphas = [ .1]) for user in range(num_users): #bool array for movies rated by user movie_user = binary[user] #which users to consider as attributes for regression, in this case all except current user neighbors = np.ones((num_users), dtype=bool) neighbors[user] = False X_train_user = X[neighbors] X_train_user = X_train_user[:, movie_user].T
def main(): fin = sys.argv[1] fin2 = sys.argv[2] readin(fin, fin2) input2 = open(fin2, 'r') nparr = np.loadtxt(fin2,delimiter=',') study = normal.normalizer(nparr) target = np.zeros((np.shape(study)[0],2)) index = np.where(study[:,14] == 0) target[index,0] = 1 index = np.where(study[:,14] == 1) target[index,1] = 1 study = study[:,:-1] order = range(np.shape(study)[0]) np.random.shuffle(order) study = study[order,:] target = target[order,:] thelen = 5 Carray = [] Tarray=[] TP = 0 FN = 0 FP = 0 TN = 0 for i in range(5): Carray.append([]) Tarray.append([]) ind = 0 for i in range(len(study)): if (ind <5): Carray[ind].append(study[i]) Tarray[ind].append(target[i]) ind = ind +1 else: ind = 0 Errors = [] Cons= [] for i in range(20): for j in range(5): Target1 = np.empty((0,2)) Target2 = np.empty((0,2)) x1 = np.empty((0,14)) x2 = np.empty((0,14)) for k in range(5): if (k!= j): for h in range(len(Carray[k])): x1 = np.vstack((x1,Carray[k][h])) Target1 = np.vstack((Target1,Tarray[k][h])) x2 = np.vstack((x2,Carray[j])) Target2= np.vstack((Target2,Tarray[j])) net = mlp(x1,Target1,5,momentum = 0.1, outtype='softmax') Errors.append(net.earlystopping(x1,Target1,x2,Target2,0.3)) num = net.confmat(x2,Target2) Cons.append(num) print("\n") print("The CONS",Cons) Errors = np.array(Errors) Max = np.amax(Errors) print("The max error:", Max) Min = np.amin(Errors) print("the min error:", Min) Mean = np.mean(Errors) print("the mean error is:", Mean) Std = np.std(Errors) print("the STD is :", Std) for i in range(len(Cons)): for j in range(2): if (j == 1): F1 = F1 + Cons[i][j][0] T2 = T2 + Cons[i][j][1] if (j == 0): T1 = T1 + Cons[i][j][0] F2 = F2 + Cons[i][j][1] T1 = T1 / len(Cons) F2 = F2 / len(Cons) F1 = F1 / len(Cons) T2 = T2 / len(Cons) Accuracy = ((T1 + T2)/(T1+T2+F1+F2)) Sensitivity = (T1/(T1+F2)) Specificity = (T2/(T2+F1)) print("The Accuracy", Accuracy) print("The Sensitivity",Sensitivity) print("The Specificity", Specificity)
import pandas as pd import re import logging import multiprocessing from normalize import normalizer import os import json import config as cf logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) SOME_FIXED_SEED = 20 replace_numbers = re.compile(r'\d+', re.IGNORECASE) normalizer_ = normalizer( 'E:/pycharm/judicial_doc_measurement/utils/lda/word.txt') word_len = 2 progress = 0 #获取单篇文章的法条 def get_law(node): law_tmp = '' law_list = [] for subnode in node: if subnode.tag == 'FLFTFZ': name = '' for subsubnode in subnode: if subsubnode.tag == 'MC': name = subsubnode.get('value') elif subsubnode.tag == 'T':
import numpy as np import pandas as pd import re import logging import multiprocessing from normalize import normalizer import os import json logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) SOME_FIXED_SEED = 20 replace_numbers = re.compile(r'\d+', re.IGNORECASE) normalizer_ = normalizer('./word.txt') word_len = 2 progress = 0 #获取单篇文章的法条 def get_law(node): law_tmp = '' law_list = [] for subnode in node: if subnode.tag == 'FLFTFZ': name = '' for subsubnode in subnode: if subsubnode.tag == 'MC': name = subsubnode.get('value') elif subsubnode.tag == 'T':
from normalize import normalizer from queue import Queue import multiprocessing as mp from method_handlers import * import commands import sys import os ERROR_FREE_CODE = True if len(sys.argv) != 2: print("Usage: python3 parser.py </path/to/GCode_file>") sys.exit(1) normalizer(sys.argv[1]) class Machine(object): ROOM_TEMP = 27 def __init__(self, instruction_queue, message_queue, current_position=(0, 0, 0), origin=(0, 0, 0), feed_rate=0, extrusion_rate=0, fan_on=False, bed_temp=ROOM_TEMP,