Python Preprocessor.preprocess 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: preprocessing

클래스/타입: Preprocessor

메소드/함수: preprocess

hotexamples.com에서의 예제들: 5

Python Preprocessor.preprocess - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 preprocessing.Preprocessor.preprocess에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Preprocessor(21)

preprocess(4)

is_null_present(4)

impute_missing_values(4)

get_columns_with_zero_std_deviation(4)

from_json(3)

fit_transform(3)

exposure_equalization(2)

preprocess_image(2)

import_labeled_data(2)

get_version(2)

getDatasetSize(2)

get_rnn_format(2)

triple_intervals(2)

displayImage(2)

apply_pca(2)

encode_labels(2)

preprocess_images(1)

makeW2Vfile(1)

preprocess_raw_data(1)

inverse_transform_lemmas(1)

inverse_transform_morph(1)

inverse_transform_pos(1)

add_branch(1)

lemmatize_all_patterns(1)

loadDataset(1)

load_transformer(1)

merge(1)

_divide_data(1)

mutual_info_select(1)

inverse_transform_labels(1)

poetic_preprocessing(1)

prepare_nn_entries(1)

_preprocess_data(1)

preprocessComment(1)

preprocessComments(1)

preprocess_data(1)

preprocess_file(1)

path(1)

execute(1)

inv_perspective_transform(1)

get_dataset_paths(1)

find_signals(1)

fit(1)

create_data_loaders(1)

generateFixedLength(1)

getBatch(1)

convert_color(1)

getXVectorData(1)

get_classification_data(1)

예제 #1

파일 보기

파일: preprocess-bug-reports.py 프로젝트: jmflorezff/testing-project

def main():
    # Directory where this script is located
    dirname = os.path.dirname(__file__)

    # List of terms to be ignored by the tokenizer
    ignore_terms = []

    # Collect the terms we want to ignore
    for ignore_file_name in IGNORE_TERMS_FILE_NAMES:
        with open(os.path.join(dirname, ignore_file_name)) as file:
            ignore_terms.extend(term.strip() for term in file)
            
    # Create our custom tokenizer, it receives the terms we want to ignore
    preprocessor = Preprocessor(word_chars='a-zA-Z0-9', inter_chars="'",
                                min_length=3, ignore=ignore_terms)
    
    for line in sys.stdin:
        bug_report = json.loads(line)
        old_title = bug_report['title']
        old_description = bug_report['description']

        bug_report['title'] = ' '.join(preprocessor.preprocess(old_title))
        bug_report['description'] = ' '.join(
            preprocessor.preprocess(old_description))

        print(json.dumps(bug_report))

예제 #2

파일 보기

파일: debug.py 프로젝트: pavsenin/aiodds

import pandas as pd
import time
from datetime import datetime, timedelta

from utils import Log
from db import DBProvider
from preprocessing import Preprocessor
from config_provider import config

db, log = DBProvider(), Log(config, 'debug')
preprocessor = Preprocessor(db, log)
preprocessor.preprocess()

예제 #3

파일 보기

 def preprocess(self):
     p = Preprocessor(self.dataset)
     p.preprocess()
     self.ground_truth = p.get_classification_data().values
     self.numerical = p.get_numerical()

예제 #4

파일 보기

db, log = DBProvider(), Log(config, 'update')

now_morning = datetime(now.year, now.month, now.day, 4)
start_time = time.time()
log.debug(f"Clear RAM {config.OS.clear_ram()}")
log.debug(f'Current DB is {config.Database}')
log.debug(f"Update current_matches to {now_morning}")
updater = CurrentUpdater(LeagueScraper(),
                         MatchScraper(from_time=None, to_time=now_morning), db,
                         log)
updater.update()
log.debug(f"Updated current_matches for {int(time.time() - start_time)} sec")

next_day = now + timedelta(days=1)
next_day_morning = datetime(next_day.year, next_day.month, next_day.day, 4)
start_time = time.time()
log.debug(f"Update future_matches from {now_morning} to {next_day_morning}")
updater = FutureUpdater(
    FutureLeagueScraper(),
    MatchScraper(from_time=now_morning, to_time=next_day_morning), db, log)
updater.update()
log.debug(f"Updated future_matches for {int(time.time() - start_time)} sec")

start_time = time.time()
log.debug(f"Clear RAM {config.OS.clear_ram()}")
preprocessor = Preprocessor(db, log)
log.debug('Preprocess matches')
num_matches = preprocessor.preprocess()
log.debug(
    f'Preprocessed {num_matches} matches for {int(time.time() - start_time)} sec'
)

예제 #5

파일 보기

# from attention import AttentionLayer
import tensorflow as tf
keras = tf.keras
from keras.layers import Input, LSTM, Embedding, Dense, Concatenate
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint

if __name__ == "__main__":
    path = "/Users/seungyoungoh/workspace/text_summarization_project/"
    data = pd.read_csv(path + "/data/sample.csv", error_bad_lines=False)
    data = data.rename({
        'body': 'src',
        'key_point': 'smry'
    }, axis='columns')[['src', 'smry']]
    pr = Preprocessor(data)
    src_max_len, smry_max_len, src_vocab, smry_vocab, X_train, X_test, y_train, y_test = pr.preprocess(
    )

    # ### modeling
    # embedding_dim = 128
    # hidden_size = 256

    # # 인코더
    # encoder_inputs = Input(shape=(src_max_len,))

    # # 인코더의 임베딩 층
    # enc_emb = Embedding(src_vocab, embedding_dim)(encoder_inputs)

    # # 인코더의 LSTM 1
    # encoder_lstm1 = LSTM(hidden_size, return_sequences=True, return_state=True ,dropout = 0.4, recurrent_dropout = 0.4)
    # encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)