Python Preprocessing.dopreprocess Exemples

Langage de programmation: Python

Espace de nommage/Pack: preprocessing

Class/Type: Preprocessing

Méthode/Fonction: dopreprocess

Exemples au hotexamples.com: 3

Python Preprocessing.dopreprocess - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de preprocessing.Preprocessing.dopreprocess extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Preprocessing(30)

dopreprocess(3)

cleanHeaders(3)

GetInputShape(3)

do(3)

dummy(2)

convert_lable_int(2)

SplitDataset(2)

convert_lable_string(2)

ReshapeInputData3D(2)

adjustBrightness(2)

denoiseImage(2)

preprocess_tweets(2)

GetTargetData(2)

decode_label(2)

encoding(2)

addhead(2)

der_features(1)

digital_img_processing(1)

decrease_time_channels(1)

datasplit(1)

data(1)

draw_image(1)

FeatureEncoding(1)

dummylist(1)

encode(1)

encoding_reduction(1)

cut_out_backgound(1)

get_location(1)

output_category_num_scale(1)

output_city_rank(1)

output_coor_scale(1)

output_dumps_scale(1)

output_missing_scale(1)

preprocess_tweet(1)

query(1)

read_with_numpy(1)

ss_treat(1)

dG0_prime(1)

create_dictionary(1)

customized_word_tokenizer(1)

binarization(1)

GetImageDataFormat(1)

GetInputData(1)

GetMaxLength(1)

GetTargetShape(1)

MissingData(1)

RemoveRedundantZero(1)

ReshapeInputData1D(1)

Split(1)

Méthodes fréquemment utilisées

Preprocessing (30)

dopreprocess (3)

cleanHeaders (3)

GetInputShape (3)

do (3)

dummy (2)

convert_lable_int (2)

SplitDataset (2)

convert_lable_string (2)

ReshapeInputData3D (2)

Méthodes fréquemment utilisées

adjustBrightness (2)

denoiseImage (2)

preprocess_tweets (2)

GetTargetData (2)

decode_label (2)

encoding (2)

addhead (2)

der_features (1)

digital_img_processing (1)

decrease_time_channels (1)

datasplit (1)

data (1)

draw_image (1)

FeatureEncoding (1)

dummylist (1)

encode (1)

encoding_reduction (1)

cut_out_backgound (1)

get_location (1)

output_category_num_scale (1)

Méthodes fréquemment utilisées

datasplit (1)

data (1)

draw_image (1)

FeatureEncoding (1)

dummylist (1)

encode (1)

encoding_reduction (1)

cut_out_backgound (1)

get_location (1)

output_category_num_scale (1)

output_city_rank (1)

output_coor_scale (1)

output_dumps_scale (1)

output_missing_scale (1)

preprocess_tweet (1)

query (1)

read_with_numpy (1)

ss_treat (1)

dG0_prime (1)

create_dictionary (1)

customized_word_tokenizer (1)

binarization (1)

GetImageDataFormat (1)

GetInputData (1)

GetMaxLength (1)

GetTargetShape (1)

MissingData (1)

RemoveRedundantZero (1)

ReshapeInputData1D (1)

Split (1)

Méthodes fréquemment utilisées

output_city_rank (1)

output_coor_scale (1)

output_dumps_scale (1)

output_missing_scale (1)

preprocess_tweet (1)

query (1)

read_with_numpy (1)

ss_treat (1)

dG0_prime (1)

create_dictionary (1)

customized_word_tokenizer (1)

binarization (1)

GetImageDataFormat (1)

GetInputData (1)

GetMaxLength (1)

GetTargetShape (1)

MissingData (1)

RemoveRedundantZero (1)

ReshapeInputData1D (1)

Split (1)

ZeroPadding (1)

_action (1)

addWindow (1)

batch_data (1)

border_image (1)

custom_vectorizer (1)

build_bow (1)

channelConversion (1)

checkCountry (1)

clean_text (1)

clean_tokens (1)

combine_heading_body (1)

convert_to_vectors (1)

corpus_preproc (1)

create_bow (1)

FeatureSelection_MIFS (1)

create_n_gram (1)

create_tokens (1)

create_train_result (1)

to_skeleton (1)

Exemple #1

0

Afficher le fichier

def read_files(directory): """ reads inkml file, extracts features and saves it to csv :param directory: :return: """ files = os.listdir(directory) print(len(files)) pre = Preprocessing() feature_matrix = [] total = len(files) completed = 0 gt_c = 0 for file in files[0:]: print("Processing file : ", file, " Remaining files : ", total-completed, " Completed files : ", completed) f = open(os.path.join(directory, file)) soup = bs.BeautifulSoup(f, 'html.parser') trace_groups = soup.find_all('tracegroup') for tracegroup in trace_groups[1:]: traceview = tracegroup.find_all('traceview') trace_id = [] for t in traceview: trace_id.append(t['tracedataref']) gt = tracegroup.annotation.text gt_c += 1 X = [] Y = [] for id in trace_id: traces = soup.findAll("trace", {'id': id}) for trace in traces: coords = trace.text.strip().split(",") x = [] y = [] for coord in coords: trace_parts = coord.strip().split(' ') x.append(float(trace_parts[0])) y.append(float(trace_parts[1])) X.extend(x) Y.extend(y) X, Y = pre.dopreprocess(x=X, y=Y) ar = pre.get_aspect(X, Y) pen = len(trace_id) feature_matrix.append(extract_features(X, Y, pen, ar, key=gt)) completed += 1 df = pd.DataFrame(feature_matrix) print("Shape of Matrix ", df.shape, " Total Ground truths in file", gt_c) name = directory.strip().split("/")[0] df.to_csv(name + ".csv", index=False)

Exemple #2

0

Afficher le fichier

def perfectly_segmented_parser(ink_dir, bonus=False): """ This is a parser for perfectly segmented symbols :param ink_dir: inkml directory :param bonus: boolean for bonus :return: """ start = time.time() lg_dir = dir.strip().split("/")[0] + "_output_lg" if not os.path.exists(lg_dir): os.mkdir(lg_dir) ink_files = os.listdir(ink_dir) if bonus: print("Loaded Bonus classifier") clf = joblib.load("relation_classifier_bonus.pkl") else: print("Loaded relationship classifier") clf = joblib.load('relation_classifier4.pkl') pre = Preprocessing() total = len(ink_files) c = 0 gt_c = 0 for file in ink_files: print("Processing file : ", file, " Files remaining : ", total - c, " Files completed : ", c) f = open(os.path.join(ink_dir, file)) soup = bs.BeautifulSoup(f, 'html.parser') trace_groups = soup.find_all('tracegroup') symbol_list = [] #loop to isolate symbols for tracegroup in trace_groups[1:]: traceview = tracegroup.find_all('traceview') trace_id = [] #loop to get strokes in a single symbol for t in traceview: trace_id.append(t['tracedataref']) gt = tracegroup.annotation.text gt_c += 1 X = [] Y = [] #extract stroke coordinates for id in trace_id: traces = soup.findAll("trace", {'id': id}) for trace in traces: coords = trace.text.strip().split(",") x = [] y = [] for coord in coords: trace_parts = coord.strip().split(' ') x.append(float(trace_parts[0])) y.append(float(trace_parts[1])) X.append(x) Y.append(y) X, Y = pre.dopreprocess(x=X, y=Y, parser=True) if gt == ",": gt = "COMMA" sym_obj = Symbol(x=X, y=Y, label=gt, stroke_id=trace_id) symbol_list.append(sym_obj) symbol_count = {} #Run through list of symbols to get their count for sym in symbol_list: if sym.symbol not in symbol_count: symbol_count[sym.symbol] = 1 sym.sym_ct = symbol_count[sym.symbol] else: symbol_count[sym.symbol] += 1 sym.sym_ct = symbol_count[sym.symbol] #perform line of sight graph, labels = line_of_sight(symbol_list, clf) #run edmonds on los graph relations = edmonds(graph) #write result to lg write_to_lg(file=file, symbol_list=symbol_list, relations=relations, labels=labels, lg_dir=lg_dir) c += 1 print("System executed in ", (time.time() - start) / 60, " minutes.")

Exemple #3

0

Afficher le fichier

Fichier : parsing.py Projet : saurabhparekh94/Online_Handwritten_Mathematical_Formula_Detection

def train(ink_dir, lg_dir): """ This function is used for training model :param ink_dir: :param lg_dir: :return: """ lg_files = os.listdir(lg_dir) pre = Preprocessing() feature_matrix = [] targets = [] c = 0 total = len(lg_files) for file in lg_files: print(file, total - c, c) symbols = {} with open(lg_dir + "/" + file) as f: for line in f: if line.startswith("O"): filt_line = line.strip().split(",") symbols[filt_line[1].strip()] = [ filt_line[2], filt_line[4:] ] inkml_file = file.replace(".lg", ".inkml") with open(ink_dir + "/" + inkml_file) as f: soup = bs.BeautifulSoup(f, 'html.parser') for key in symbols: label = symbols[key][0] strokes = symbols[key][1] id_list = [] X = [] Y = [] for id in strokes: st_id = id.strip() trace = soup.findAll("trace", {'id': st_id}) coords = trace[0].text.strip().split(",") x = [] y = [] for coord in coords: trace_parts = coord.strip().split(' ') x.append(float(trace_parts[0])) y.append(float(trace_parts[1])) X.append(x) Y.append(y) id_list.append(st_id) X, Y = pre.dopreprocess(x=X, y=Y, parser=True) symbols[key] = Symbol(label=label, x=X, y=Y, stroke_id=id_list) # relations section with open(lg_dir + "/" + file) as f: for line in f: if line.startswith("EO"): filt_line = line.strip().split(",") sym1 = symbols[filt_line[1].strip()] sym2 = symbols[filt_line[2].strip()] relation = filt_line[3].strip() writing_slope = sym1.writing_slope(sym2) writing_curve = sym1.writing_curvature(sym2) bb_dist = sym1.distance_between_box(sym2) distance, horizontal_ofsset, vertical_distance = sym1.distance_between_average_centres( sym2) max_point_pair = sym1.maximal_point_distance(sym2) feature_matrix.append([ writing_slope, writing_curve, bb_dist, distance, horizontal_ofsset, vertical_distance, max_point_pair ]) targets.append(relation) c += 1 print("Shape of Training matrix") print(len(feature_matrix), "x", len(feature_matrix[0])) print("Unique labels : ", np.unique(targets)) rf = RandomForestClassifier(n_estimators=100, n_jobs=-1) rf.fit(X=feature_matrix, y=targets) joblib.dump(rf, "relation_classifier_bonus.pkl", protocol=pickle.HIGHEST_PROTOCOL) rf = joblib.load("relation_classifier_bonus.pkl") score = accuracy_score(y_true=targets, y_pred=rf.predict(feature_matrix), normalize=True) print("accuracy of model is :", (score * 100))