Python Preprocessor Examples, preprocessor.Preprocessor Python Examples

Example #1

0

Show file

File: evaluator.py Project: mahaitongdae/toyota_exp_train

    def __init__(self, policy_cls, env_id, args):
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        self.args = args
        self.env = gym.make(env_id, **args2envkwargs(args))
        self.policy_with_value = policy_cls(self.args)
        self.iteration = 0
        if self.args.mode == 'training':
            self.log_dir = self.args.log_dir + '/evaluator'
        else:
            self.log_dir = self.args.test_log_dir
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)

        self.preprocessor = Preprocessor((self.args.obs_dim, ),
                                         self.args.obs_preprocess_type,
                                         self.args.reward_preprocess_type,
                                         self.args.obs_scale,
                                         self.args.reward_scale,
                                         self.args.reward_shift,
                                         gamma=self.args.gamma)

        self.writer = self.tf.summary.create_file_writer(self.log_dir)
        self.stats = {}
        self.eval_timer = TimerStat()
        self.eval_times = 0

Example #2

0

Show file

File: itp.py Project: aozalevsky/GromacsWrapper

    def read(self, filename=None, preprocess=True, **defines):
        """Preprocess, read and parse itp file *filename*.

        Any keywords in *defines* are use to modify the default preprocessor
        variables (see
        :meth:`gromacs.fileformats.preprocessor.Preprocessor.parse` for
        details). Setting *preprocess* = ``False`` skips the preprocessing
        step.
        """
        self._init_filename(filename)

        if preprocess:
            kwargs = self.defines.copy()
            kwargs['commentchar'] = self.commentchar
            kwargs['clean'] = True
            ppitp = Preprocessor(self.real_filename, **kwargs)
            ppitp.parse(**defines)
            itp = ppitp.StringIO()
        else:
            itp = open(self.real_filename)

        try:
            stream = OneLineBuffer(itp.next)
            self.parse(stream)
        finally:
            itp.close()

Example #3

0

Show file

    def processData(self):
        """
        The purpose of this method is to process both train/test raw data
        """
        # Load the preprocessor
        preprocessor = Preprocessor()

        if self.train:
            filename = self.parameters['data-path'] + self.parameters[
                'train-data-filename']
        else:
            filename = self.parameters['data-path'] + self.parameters[
                'test-data-filename']

        # read the required file
        data_df = pd.read_json(path_or_buf=filename, lines=True)

        # concatenate response and last 'n' contexts together
        data_df['CONTEXT'] = data_df['context'].apply(
            lambda x: ' '.join(x[-self.n_last_context:]))
        data_df['text'] = data_df['CONTEXT'] + ' ' + data_df['response']
        data_df['text'] = data_df['text'].apply(
            lambda x: preprocessor.process_text_bert(x))

        # save the processed data
        if self.train:
            filename = self.parameters['processed-data-path'] + self.parameters[
                'processed-train-data-filename']
            data_df[['text', 'label']].to_csv(filename)
        else:
            filename = self.parameters['processed-data-path'] + self.parameters[
                'processed-test-data-filename']
            data_df[['text']].to_csv(filename)
        return

Example #4

0

Show file

File: train.py Project: shailesh1210/BCCLASSIFER

def load_data(batch_size):
    '''
    Loads training, validation and test data from resources.
    '''
    data_path = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../resources'))
    data_config = {
        "training": {
            "size": 0.8
        },
        "test": {
            "size": 0.1
        },
        "validation": {
            "size": 0.1
        }
    }

    p = Preprocessor(base_path=data_path, datasets=data_config)

    train_data = p.generate_images('training',
                                   shuffle=True,
                                   batch_size=batch_size)
    valid_data = p.generate_images('validation',
                                   shuffle=False,
                                   batch_size=batch_size)

    class_weights = p.get_class_weights()

    return train_data, valid_data, class_weights

Example #5

0

Show file

File: process_data.py Project: Alec-Schneider/BejingPollutionAnalysis

def run_imputation(df):
    """
    Fill in missing numeric values using Kalman Filtering,
    and fill in missing null 
    """

    # Create the dateIndex
    time_cols = ['year', 'month', 'day', 'hour']
    df["timestamp"] = pd.to_datetime(df[time_cols])
    df.set_index("timestamp", inplace=True)
    df.drop(columns=time_cols, inplace=True)
    # Check if there are null values in the dataset and get the columns
    nulls = df.isnull().sum()
    null_cols = nulls[nulls > 0].index.values
    numeric_null_cols = get_numeric_null_cols(df, null_cols)
    obj_null_cols = get_string_null_cols(df, null_cols)

    # Use Kalman Filtering to impute missing numeric
    # values
    for col in numeric_null_cols:
        prep = Preprocessor()
        arr = prep.kalman_impute(df[col])
        df[col] = arr

        # Backfill any missing data at the beginning of the array
        if df[col].isnull().sum():
            df[col].fillna(method="bfill", inplace=True)

    # Random draw based on distribution of
    # unique vals in each column
    for col in obj_null_cols:
        arr = fill_missing_strings(df[col])
        df[col] = arr

    return df

Example #6

0

Show file

File: run.py Project: clojia/neural-network

def testTennisOrIris(trainDataFile, testDataFile, attrDataFile):
    data = Preprocessor(trainDataFile, testDataFile, attrDataFile)
    data.loadData()
    trainData = data.getMatrix(data.getTrainData())
    testData = data.getMatrix(data.getTestData())
 
    numInput = data.getNumInput()
    numOutput = len(data.getClasses())
    numHidden = 3
    seed = 4 
    learningRate = 0.1
    maxEpochs = 5000
    momentum = 0.0

    print("Generating neural network: %d-%d-%d" % (numInput, numHidden,numOutput))
    nn = NeuralNetwork(numInput, numHidden, numOutput, seed)
    nn.train(trainData, maxEpochs, learningRate, momentum)
    print("Training complete")

 #   accTrain = nn.accuracy(trainData)
    accTest = nn.accuracy(testData)

 #   print("\nAccuracy on train data = %0.4f " % accTrain)
   
    print("Accuracy on test data   = %0.4f " % accTest)

Example #7

0

Show file

 def __init__(self):
     self._rows = 0
     self._cols = 0
     self._params = {}
     self._model = LogisticRegression(max_iter=20)
     self._preprocessor = Preprocessor()
     self.init_params()

Example #8

0

Show file

File: preprocessor_test.py Project: googleinterns/stack-trace-classifier

    def test_search_lines(self):
        """Various test cases for preprocessor.search_lines."""
        preprocessor = Preprocessor(self.empty_dataframe, self.config,
                                    '_INFO_')

        # These lines should get deleted
        no_matches = ['not matching text', 'should get deleted']
        self.assertFalse(preprocessor.search_lines(no_matches))

        # The first line should be kept since it explicitly matches
        # to all RE in USEFUL_INFORMATION
        # The second line is removed since although it matches to error,
        # it does not match to USEFUL_INFORMATION
        single_match = [
            'this error is USEFUL_INFORMATION', 'but this error is not'
        ]
        self.assertEqual(preprocessor.search_lines(single_match),
                         ['this error is USEFUL_INFORMATION'])

        # Both lines should be kept since both are explicitly matched to
        # by both regular expressions in search_lines
        multi_match = [
            'this error is USEFUL_INFORMATION',
            'of course that error is USEFUL_INFORMATION!'
        ]
        self.assertEqual(preprocessor.search_lines(multi_match), multi_match)

Example #9

0

Show file

File: callback.py Project: zhuifengqingyang/gcnet_stereo

    def __init__(self, left_filename, right_filename, directory, config):
        super(TestPredictionCallback, self).__init__()

        self.directory = directory

        # Crop and expand dims to batch = 1
        crop_start_row = config['crop_start_row']
        crop_start_col = config['crop_start_col']
        crop_stop_row = crop_start_row + config['crop_height']
        crop_stop_col = crop_start_col + config['crop_width']

        preprocessor = Preprocessor()

        img = img_to_array(load_img(left_filename),
                           data_format='channels_first')
        img = img[:, crop_start_row:crop_stop_row,
                  crop_start_col:crop_stop_col]
        left_img = preprocessor.resize_img(img, [
            config['channels'], config['resized_height'],
            config['resized_width']
        ])

        img = img_to_array(load_img(right_filename),
                           data_format='channels_first')
        img = img[:, crop_start_row:crop_stop_row,
                  crop_start_col:crop_stop_col]
        right_img = preprocessor.resize_img(img, [
            config['channels'], config['resized_height'],
            config['resized_width']
        ])

        self.left_img = np.expand_dims(left_img, axis=0)
        self.right_img = np.expand_dims(right_img, axis=0)

Example #10

0

Show file