Python preprocess 예제들, helpers.preprocess Python 예제들

예제 #1

0

파일 보기

def compute_frechet_inception_distance(z, y_fake, x_fake, x, y, args, di=None):
    h_fakes = []
    h_reals = []
    for i in range(args.max_iter):
        logger.info("Compute at {}-th batch".format(i))
        # Generate
        z.d = np.random.randn(args.batch_size, args.latent)
        y_fake.d = generate_random_class(args.n_classes, args.batch_size)
        x_fake.forward(clear_buffer=True)
        # Predict for fake
        x_fake_d = x_fake.d.copy()
        x_fake_d = preprocess(
            x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess)
        x.d = x_fake_d
        y.forward(clear_buffer=True)
        h_fakes.append(y.d.copy().squeeze())
        # Predict for real
        x_d, _ = di.next()
        x_d = preprocess(
            x_d, (args.image_size, args.image_size), args.nnp_preprocess)
        x.d = x_d
        y.forward(clear_buffer=True)
        h_reals.append(y.d.copy().squeeze())
    h_fakes = np.concatenate(h_fakes)
    h_reals = np.concatenate(h_reals)

    # FID score
    ave_h_real = np.mean(h_reals, axis=0)
    ave_h_fake = np.mean(h_fakes, axis=0)
    cov_h_real = np.cov(h_reals, rowvar=False)
    cov_h_fake = np.cov(h_fakes, rowvar=False)
    score = np.sum((ave_h_real - ave_h_fake) ** 2) \
        + np.trace(cov_h_real + cov_h_fake - 2.0 *
                   sqrtm(np.dot(cov_h_real, cov_h_fake)))
    return score

예제 #2

0

파일 보기

    def __readImages(self, filename):
        image_string = tf.read_file(
            filename)  #Gets a string tensor from a file
        decodedInput = tf.image.decode_image(
            image_string)  #Decode a string tensor as image
        floatInput = tf.image.convert_image_dtype(
            decodedInput, dtype=tf.float32)  #Transform image to float32

        assertion = tf.assert_equal(tf.shape(floatInput)[-1],
                                    3,
                                    message="image does not have 3 channels")

        with tf.control_dependencies([assertion]):
            floatInput.set_shape([None, None, 3])
            inputShape = floatInput.get_shape()

            if self.mode == "eval":  #If the inputs are only the number of pictures declared
                blackTargets = tf.zeros([
                    self.inputImageSize,
                    self.inputImageSize * self.nbTargetsToRead, 3
                ])
                floatInput = tf.concat([floatInput, blackTargets], axis=1)

            floatInputSplit = tf.split(
                floatInput,
                self.nbTargetsToRead + self.inputNumbers,
                axis=1,
                name="Split_input_data"
            )  #Splitted we get a list of nbTargets + inputNumbers images

        #Sets the inputs and outputs depending on the order of images
        if self.which_direction == "AtoB":
            inputs = floatInputSplit[:self.inputNumbers]
            targets = floatInputSplit[self.inputNumbers:]

        elif self.which_direction == "BtoA":
            inputs = floatInputSplit[self.inputNumbers:]
            targets = floatInputSplit[:self.inputNumbers]
        else:
            raise ValueError("Invalid direction")
        gammadInputs = inputs
        inputs = [tf.pow(input, 2.2)
                  for input in inputs]  #correct for the gamma
        #If we want to log the inputs, we do it here
        if self.logInput:
            inputs = [helpers.logTensor(input) for input in inputs]

        #The preprocess function puts the vectors value between [-1; 1] from [0;1]
        inputs = [helpers.preprocess(input) for input in inputs]
        #gammadInputs = [helpers.preprocess(gammadInput) for gammadInput in gammadInputs]
        targets = [helpers.preprocess(target) for target in targets]
        #We used to resize inputs and targets here, we have no functional need for it. Will see if there is a technical need to define the actual size.

        return filename, inputs, targets, gammadInputs

예제 #3

0

파일 보기

파일: drive.py 프로젝트: MacZel/CarND-Behavioral-Cloning-P4

def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = data["steering_angle"]
        # The current throttle of the car
        throttle = data["throttle"]
        # The current speed of the car
        speed = data["speed"]
        # The current image from the center camera of the car
        imgString = data["image"]
        image = Image.open(BytesIO(base64.b64decode(imgString)))
        image_array = hlp.preprocess(np.asarray(image))
        steering_angle = float(
            model.predict(image_array[None, :, :, :], batch_size=1))

        throttle = controller.update(float(speed))

        print(steering_angle, throttle)
        send_control(steering_angle, throttle)

        # save frame
        if args.image_folder != '':
            timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
            image_filename = os.path.join(args.image_folder, timestamp)
            image.save('{}.jpg'.format(image_filename))
    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True)

예제 #4

0

파일 보기

    def __readImages(self, filename):
        image_string = tf.read_file(
            filename)  #Gets a string tensor from a file
        decodedInput = tf.image.decode_image(
            image_string)  #Decode a string tensor as image
        floatInput = tf.image.convert_image_dtype(
            decodedInput, dtype=tf.float32)  #Transform image to float32
        assertion = tf.assert_equal(tf.shape(floatInput)[-1],
                                    3,
                                    message="image does not have 3 channels")

        with tf.control_dependencies([assertion]):
            floatInput.set_shape([None, None, 3])
        gammadInput = floatInput
        #print("CAREFUL THE GAMMA IS NOT CORRECTED AUTOMATICALLY")
        #input = floatInput
        input = tf.pow(floatInput, 2.2)  #correct for the gamma
        #If we want to log the inputs, we do it here
        if self.logInput:
            input = helpers.logTensor(input)
        #The preprocess function puts the vectors value between [-1; 1] from [0;1]
        input = helpers.preprocess(input)

        targets = tf.zeros(tf.shape(input))  # is here (None, None, 3)
        targets = tf.expand_dims(targets, axis=0)
        targets = tf.tile(targets, (self.nbTargetsToRead, 1, 1, 1))

        return filename, input, targets, gammadInput

예제 #5

0

파일 보기

파일: test.py 프로젝트: KelvinCODES/Deep-CNN-Vehicle-Driving

def telemetry(sid, data):
    if data:
        #Checking current speed, throttle, steering angle, and image
        steering_angle = float(data["steering_angle"])
        throttle = float(data["throttle"])
        speed = float(data["speed"])
        image = Image.open(BytesIO(base64.b64decode(data["image"])))
        try:
            #Changing the center image
            #convert PIL image to np array
            image = np.asarray(image)
            #apply the preprocessing
            image = helpers.preprocess(image)
            #our training model expects 4D array
            image = np.array([image])

            #use trained model to predict angle for a given image
            steering_angle = float(model.predict(image, batch_size=1))
            #Set throttle of the car
            throttle = float(1) - (steering_angle**2) - (speed / 30)**2
            print('steering angle:{}, speed:{}, throttle:{}'.format(
                steering_angle, speed, throttle))
            #send back the steering angle, and throttle
            send_control(steering_angle, throttle)
        except Exception as e:
            print(e)
    else:
        sio.emit('manual', data={}, skip_sid=True)

예제 #6

0

파일 보기

    def __renderInputs(self, materials, renderingScene, jitterLightPos, jitterViewPos, mixMaterials):
        fullSizeMixedMaterial = materials
        if mixMaterials:
            alpha = tf.random_uniform([1], minval=0.1, maxval=0.9, dtype=tf.float32, name="mixAlpha")

            materials1 = materials[::2]
            materials2 = materials[1::2]

            fullSizeMixedMaterial = helpers.mixMaterials(materials1, materials2, alpha)

        if self.inputImageSize >=  self.tileSize :
            if self.fixCrop:
                xyCropping = (self.inputImageSize - self.tileSize) // 2
                xyCropping = [xyCropping, xyCropping]
            else:
                xyCropping = tf.random_uniform([2], 0, self.inputImageSize - self.tileSize, dtype=tf.int32)
            cropped_mixedMaterial = fullSizeMixedMaterial[:,:, xyCropping[0] : xyCropping[0] + self.tileSize, xyCropping[1] : xyCropping[1] + self.tileSize, :]
        elif self.inputImageSize < self.tileSize:
            raise Exception("Size of the input is inferior to the size of the rendering, please provide higher resolution maps")
        cropped_mixedMaterial.set_shape([None, self.nbTargetsToRead, self.tileSize, self.tileSize, 3])
        mixedMaterial = helpers.adaptRougness(cropped_mixedMaterial)

        targetstoRender = helpers.target_reshape(mixedMaterial) #reshape it to be compatible with the rendering algorithm [?, size, size, 12]
        nbRenderings = 1
        rendererInstance = renderer.GGXRenderer(includeDiffuse = True)
        ## Do renderings of the mixedMaterial

        targetstoRender = helpers.preprocess(targetstoRender) #Put targets to -1; 1
        surfaceArray = helpers.generateSurfaceArray(self.tileSize)

        inputs = helpers.generateInputRenderings(rendererInstance, targetstoRender, self.batchSize, nbRenderings, surfaceArray, renderingScene, jitterLightPos, jitterViewPos, self.useAmbientLight, useAugmentationInRenderings = self.useAugmentationInRenderings)

        self.gammaCorrectedInputsBatch =  tf.squeeze(inputs, [1])

        inputs = tf.pow(inputs, 2.2) # correct gamma
        if self.logInput:
            inputs = helpers.logTensor(inputs)

        inputs = helpers.preprocess(inputs) #Put inputs to -1; 1

        targets = helpers.target_deshape(targetstoRender, self.nbTargetsToRead)
        return targets, inputs

예제 #7

0

파일 보기

파일: wiki.py 프로젝트: sudodoki/prj-nlp

def add_section(section=None, path=None, content=None, parent="", output={}):
    if isinstance(section, WikipediaPageSection):
        title = section.title
        text = section.text
        level = section.level
    else:
        title = "summary"
        text = section
        level = 1
    new_path = f"{path}/{title}"
    start = content.index(text)
    end = start + len(text)
    hash_value = hashlib.md5(new_path.encode()).hexdigest()
    output["sections"][hash_value] = {
        "parent": preprocess(parent),
        "level": level,
        "start": start,
        "end": end,
        "title": preprocess(title),
        "path": preprocess(new_path)
    }
    return output, new_path

예제 #8

0

파일 보기

파일: processing.py 프로젝트: modelhub-ai/duc-semantic

 def _preprocessBeforeConversionToNumpy(self, image):
     if isinstance(image, PIL.Image.Image):
         # switches PIL to cv2
         self._im = np.array(image)
         if len(self._im.shape) <= 2:
             raise IOError("Image format not supported for preprocessing.")
         # set output shape (same as input shape)
         self._result_shape = [self._im.shape[0], self._im.shape[1]]
         # set rgb mean of input image (used in mean subtraction)
         self._rgb_mean = cv2.mean(self._im)
         pre = preprocess(self._im, self._rgb_mean)
         return pre
     else:
         raise IOError("Image Type not supported for preprocessing.")

예제 #9

0

파일 보기

def compute_inception_score(z, y_fake, x_fake, x, y, args):
    preds = []
    for i in range(args.max_iter):
        logger.info("Compute at {}-th batch".format(i))
        # Generate
        z.d = np.random.randn(args.batch_size, args.latent)
        y_fake.d = generate_random_class(args.n_classes, args.batch_size)
        x_fake.forward(clear_buffer=True)
        # Predict
        x_fake_d = x_fake.d.copy()
        x_fake_d = preprocess(
            x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess)
        x.d = x_fake_d
        y.forward(clear_buffer=True)
        preds.append(y.d.copy())
    p_yx = np.concatenate(preds)
    # Score
    p_y = np.mean(p_yx, axis=0)
    kld = np.sum(p_yx * (np.log(p_yx) - np.log(p_y)), axis=1)
    score = np.exp(np.mean(kld))
    return score

예제 #10

0

파일 보기

def run_pipeline(iF):
    try:
        
        print('Now working on '+ iF)
        dataset = lm.loadmat(iF)
        dataset = preprocess(dataset)
        if 'anatomy' not in dataset.keys():
            return
        else:
            anatomy = dataset['anatomy']
            if 'parent_shifted' in anatomy:
                group = anatomy['parent_shifted']
            else:
                group = anatomy['cluster_parent']
        region = 'MEC'
        idx = [region in ss for ss in group]
        idx = np.array(idx)
        idx = idx[dataset['sp']['cgs']==2]

        if idx.sum()==0:
            return
        
        dataset['spikecount']=dataset['spikecount'][:,idx]

        (model, bl_scores) = eval_and_train(dataset)
        (Ypred,Ytrue,speed,trial,c_matrix) = score_gain_model(model,dataset)
        plt.plot(Ytrue)

        plt.plot(dataset['posx_centers'][Ypred-1])
        name = os.path.basename(iF)[0:-4]
        plt.savefig('F:\\temp\\classifier_out\\'+region +'_'+ name + '.png')
        plt.close()
        tmp_array = np.array([Ypred,Ytrue,speed,trial,dataset['posx_edges']])
        np.save('F:\\temp\\classifier_out\\'+region +'_'+ name + '_scores.npy',tmp_array)
        #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_scores.npy',tmp_array)
        #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_confMatrix.npy',conf_matrix)
    except Exception as e:
        print(str(e))
        print('not working')
        pass

예제 #11

0

파일 보기

df = pd.read_csv('bugs-2019-11-25.csv')
# Make target lables from product and component values while dropping labels that have less than 10 occurencies
df['target'] = df[['Product', 'Component']].apply(' -- '.join, axis=1)
df = df.groupby('target').filter(lambda x: len(x) > 50)
df['target'] = df['target'].astype('category')
df['target_labels'] = df['target'].cat.codes

# Check that there are no missing summaries
print(f"Number of missing comments in comment text: {df['Summary'].isnull().sum()}")

# Explore categories
explore(df['target'], 40)

# Preprocess Summary dataset
print(f"Summary column before preprocessing:\n{df['Summary'].head()}")
df['Summary'] = preprocess(df['Summary'])
print(f"Summary column after preprocessing:\n{df['Summary'].head()}")

# Split dataset into train and test data
X = df[['Summary', 'Reporter', 'Assignee', 'OS']].apply(' '.join, axis=1)
y = df['target_labels']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True)
print(f"Train dataset shape: {X_train.shape, y_train.shape}")
print(f"Test dataset shape: {X_test.shape, y_test.shape}")

# Initializing TfidfVectorizer. Using option stop_words=’english’ will stop considering common english words
vectorizer = TfidfVectorizer()

# Vectorize the train dataset
X_train_vectors = vectorizer.fit_transform(X_train)

예제 #12

0

파일 보기

파일: extractor.py 프로젝트: oknotok97/ir_project

def main():
    soup = getsoup(PHONE_SPECS_PAGE)
    cleansoup = preprocess(soup)
    specs = cleansoup.find_all(class_="techspecs-section")
    specsdict = getspecs(specs)
    print(specsdict)

예제 #13

0

파일 보기

    def populateInNetworkFeedGraphSpatialMix(self,
                                             renderingScene,
                                             shuffle=True,
                                             imageSize=512,
                                             useSpatialMix=True):
        with tf.name_scope("load_images"):
            #Create a tensor out of the list of paths
            filenamesTensor = tf.constant(self.pathList)
            #Reads a slice of the tensor, for example, if the tensor is of shape [100,2], the slice shape should be [2] (to check if we have problem here)
            dataset = tf.data.Dataset.from_tensor_slices(filenamesTensor)

            #for each slice apply the __readImages function
            dataset = dataset.map(self.__readImagesGT,
                                  num_parallel_calls=int(
                                      multiprocessing.cpu_count() / 4))
            #Authorize repetition of the dataset when one epoch is over.
            #shuffle = True
            if shuffle:
                dataset = dataset.shuffle(buffer_size=16,
                                          reshuffle_each_iteration=True)
            #set batch size
            dataset = dataset.repeat()
            toPull = self.batchSize
            if useSpatialMix:
                toPull = self.batchSize * 2
            batched_dataset = dataset.batch(toPull)
            batched_dataset = batched_dataset.prefetch(buffer_size=4)
            #Create an iterator to be initialized
            iterator = batched_dataset.make_initializable_iterator()

            #Create the node to retrieve next batch
            paths_batch, targets_batch = iterator.get_next()
            inputRealSize = imageSize  #Should be input image size but changed tmp

            if useSpatialMix:
                threshold = 0.5
                perlinNoise = tf.expand_dims(tf.expand_dims(
                    helpers.generate_perlin_noise_2d(
                        (inputRealSize, inputRealSize), (1, 1)),
                    axis=-1),
                                             axis=0)
                perlinNoise = (perlinNoise + 1.0) * 0.5
                perlinNoise = perlinNoise >= threshold
                perlinNoise = tf.cast(perlinNoise, tf.float32)
                inverted = 1.0 - perlinNoise

                materialsMixed1 = targets_batch[::2] * perlinNoise
                materialsMixed2 = targets_batch[1::2] * inverted

                fullSizeMixedMaterial = materialsMixed1 + materialsMixed2
                targets_batch = fullSizeMixedMaterial
                paths_batch = paths_batch[::2]

            targetstoRender = helpers.target_reshape(
                targets_batch
            )  #reshape it to be compatible with the rendering algorithm [?, size, size, 12]
            nbRenderings = 1
            rendererInstance = renderer.GGXRenderer(includeDiffuse=True)
            ## Do renderings of the mixedMaterial
            mixedMaterial = helpers.adaptRougness(targetstoRender)

            targetstoRender = helpers.preprocess(
                targetstoRender)  #Put targets to -1; 1
            surfaceArray = helpers.generateSurfaceArray(inputRealSize)

            inputs_batch = helpers.generateInputRenderings(
                rendererInstance,
                targetstoRender,
                self.batchSize,
                nbRenderings,
                surfaceArray,
                renderingScene,
                False,
                False,
                self.useAmbientLight,
                useAugmentationInRenderings=self.useAugmentationInRenderings)

            targets_batch = helpers.target_deshape(targetstoRender,
                                                   self.nbTargetsToRead)
            self.gammaCorrectedInputsBatch = tf.squeeze(inputs_batch, [1])
            #tf.summary.image("GammadInputs", helpers.convert(inputs[0, :]), max_outputs=5)
            inputs_batch = tf.pow(inputs_batch, 2.2)  # correct gamma
            if self.logInput:
                inputs_batch = helpers.logTensor(inputs_batch)

            #Do the random crop, if the crop if fix, crop in the middle
            if inputRealSize > self.tileSize:
                if self.fixCrop:
                    xyCropping = (inputRealSize - self.tileSize) // 2
                    xyCropping = [xyCropping, xyCropping]
                else:
                    xyCropping = tf.random_uniform([1],
                                                   0,
                                                   inputRealSize -
                                                   self.tileSize,
                                                   dtype=tf.int32)

                inputs_batch = inputs_batch[:, :, xyCropping[0]:xyCropping[0] +
                                            self.tileSize,
                                            xyCropping[0]:xyCropping[0] +
                                            self.tileSize, :]
                targets_batch = targets_batch[:, :,
                                              xyCropping[0]:xyCropping[0] +
                                              self.tileSize,
                                              xyCropping[0]:xyCropping[0] +
                                              self.tileSize, :]

            #Set shapes
            inputs_batch = tf.squeeze(
                inputs_batch, [1]
            )  #Before this the input has a useless dimension in 1 as we have only 1 rendering
            inputs_batch.set_shape([None, self.tileSize, self.tileSize, 3])
            targets_batch.set_shape(
                [None, self.nbTargetsToRead, self.tileSize, self.tileSize, 3])

            #Populate the object
            self.stepsPerEpoch = int(
                math.floor(len(self.pathList) / self.batchSize))
            self.inputBatch = inputs_batch
            self.targetBatch = targets_batch
            self.iterator = iterator
            self.pathBatch = paths_batch

예제 #14

0

파일 보기

df = pd.read_csv(
    r"data\forestfires.csv",
    parse_dates=[],
    index_col=[],
)

print(
    pd.concat([df.dtypes, df.nunique() / len(df)],
              axis=1).rename({
                  0: "dtype",
                  1: "proportion unique"
              }, axis=1).sort_values(["dtype", "proportion unique"]))

ENCODE = True
CATEGORIZE = True
X, y = preprocess(df, False, True, False)
sns.kdeplot(y)
plt.title("KDE distribution")
plt.show()

SEED = 0
SAMPLE_SIZE = 10000

Xt, Xv, yt, yv = train_test_split(
    X, y, random_state=SEED)  # split into train and validation set
dt = lgb.Dataset(Xt, yt, free_raw_data=False)
np.random.seed(SEED)
sample_idx = np.random.choice(Xt.index, size=SAMPLE_SIZE)
Xs, ys = Xt.loc[sample_idx], yt.loc[sample_idx]
ds = lgb.Dataset(Xs, ys)
dv = lgb.Dataset(Xv, yv, free_raw_data=False)

예제 #15

0

파일 보기

파일: extractor.py 프로젝트: oknotok97/ir_project

def main():
    soup = getsoup(PATH)
    cleansoup = preprocess(soup)
    specs = getspecs(cleansoup)
    print(specs)

예제 #16

0

파일 보기

 def test_all(self):
     text = read_from_file('../input.txt')
     messages, customer = preprocess(text)
     suggestions = algorithm(PREPROCESSED_MESSAGES, CUSTOMER)
     postprocessed = postprocess(suggestions, customer)
     self.assertEqual(postprocessed, POSTPROCESSED_TEXT)

예제 #17

0

파일 보기

파일: wiki.py 프로젝트: sudodoki/prj-nlp

def get_wiki_json(title):
    page = wiki.page(title)
    output = parse_sections(page, content=page.text)
    output["text"] = preprocess(page.text)
    return output

예제 #18

0

파일 보기

파일: train.py 프로젝트: nickcorona/divorce

import lightgbm as lgb
from helpers import preprocess
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

df = pd.read_csv(
    r"data\forestfires.csv",
    parse_dates=[],
    index_col=[],
)
X, y = preprocess(df, encode=False, categorize=True, preran=False)
X = X.drop("rain", axis=1)
d = lgb.Dataset(X, y, silent=True)

# rmse: 98.18188205858038
NUM_BOOST_ROUND = 455
params = {
    "objective": "rmse",
    "metric": "rmse",
    "verbose": -1,
    "n_jobs": 6,
    "learning_rate": 0.004090619790710353,
    "feature_pre_filter": False,
    "lambda_l1": 6.99239231800302e-08,
    "lambda_l2": 9.330959145992983,
    "num_leaves": 9,
    "feature_fraction": 0.8999999999999999,
    "bagging_fraction": 1.0,
    "bagging_freq": 0,
    "min_child_samples": 20,

예제 #19

0

파일 보기

                 parse_dates=[],
                 index_col=[],
                 delimiter=";")

print(
    pd.concat([df.dtypes, df.nunique() / len(df)],
              axis=1).rename({
                  0: "dtype",
                  1: "proportion unique"
              }, axis=1).sort_values(["dtype", "proportion unique"]))

ENCODE = False
CATEGORIZE = True
y = df["Divorce"]
df = df.drop("Divorce", axis=1)
X = preprocess(df, ENCODE, 5, True)
sns.kdeplot(y)
plt.title("KDE distribution")
plt.show()

SEED = 0
SAMPLE_SIZE = 10000

Xt, Xv, yt, yv = train_test_split(
    X, y, random_state=SEED)  # split into train and validation set
dt = lgb.Dataset(Xt, yt, free_raw_data=False)
np.random.seed(SEED)
sample_idx = np.random.choice(Xt.index, size=SAMPLE_SIZE)
Xs, ys = Xt.loc[sample_idx], yt.loc[sample_idx]
ds = lgb.Dataset(Xs, ys)
dv = lgb.Dataset(Xv, yv, free_raw_data=False)

예제 #20

0

파일 보기

def main():
    soup = getsoup(SPEC_PAGE)
    cleansoup = preprocess(soup)
    getspecs(cleansoup)

예제 #21

0

파일 보기

def main():
    modifiers = itemData.instantiateFromCSVtoitemData(MODIFIERS_FILE)
    targets = targets = itemData.instantiateFromCSVtoitemData(TARGETS_FILE)

    df = pd.read_pickle(SOURCE_DF)
    df = df[df.train_val == 'val']
    print(df.head())
    print(len(df))
    #df = df.iloc[:10]
    ref = pd.read_excel(REFERENCE_STANDARD)
    ref = update_reference_df(ref)
    reports = list(zip(df['note_name'], df['text']))
    pool = Pool(processes=8)
    list_of_classified_markups = [
        pool.apply(extract_markups_from_text,
                   args=(name_and_text, targets, modifiers))
        for name_and_text in reports
    ]
    pool.close()
    pool.join()
    classified_markups = pd.DataFrame(
        columns=['m', 'doc_span', 'markup_class', 'text']).append(
            list_of_classified_markups)
    print(classified_markups.head())
    exit()
    ##PICK up here

    classified_markups = [{
        'm': m,
        'doc_span': m.docSpan,
        'markup_class': m.markup_class,
        'text': m.text
    } for m in list_of_markups]

    # TODO: Make this one long dataframe, like classified_markups
    df['markups'] = df.apply(
        lambda row: extract_markups_from_text(row.text, targets, modifiers),
        axis=1)
    print(df.head())
    classified_markups = pd.DataFrame(
        columns=['m', 'doc_span', 'markup_class', 'text'])
    for idx, row in df.iterrows():
        # Get all annotations from reference standard with this report name
        #annotations = ref[ref['File Name with extension'] == row.note_name]
        row_markups = classify_markups(row.markups, row.note_name)
        print(classified_markups)
        #if classified_markups
        classified_markups = classified_markups.append(row_markups,
                                                       ignore_index=True)
    print(len(classified_markups))
    print(classified_markups.head())
    evaluate_markups(ref, classified_markups)

    exit()
    reports = list(df[df.train_val == 'train']['text'])
    reports = [helpers.preprocess(report) for report in reports]
    split_reports = [
        helpers.my_sentence_splitter(report) for report in reports
    ]
    markups = []
    for report in split_reports[:10]:
        # Each report is a list of sentence span pairs
        for text, span in report:
            m = create_markup(s=text,
                              modifiers=modifiers,
                              targets=targets,
                              span=span)
            markups.append(m)
    print(markups)
    exit()

    markups = [
        create_markup(s=sentence,
                      modifiers=modifiers,
                      targets=targets,
                      span=span) for (sentence, span) in sentence_span_pairs
    ]

    report_names = list(set(df.note_name))
    for report in report_names:
        report_df = df[df.note_name == report]
        evaluate_report(report_df)

예제 #22

0

파일 보기

    def __renderInputs(self, materials, renderingScene, jitterLightPos,
                       jitterViewPos, mixMaterials, isTest, renderSize):
        mixedMaterial = materials
        if mixMaterials:
            alpha = tf.random_uniform([1],
                                      minval=0.1,
                                      maxval=0.9,
                                      dtype=tf.float32,
                                      name="mixAlpha")
            #print("mat2: " + str(materials2))

            materials1 = materials[::2]
            materials2 = materials[1::2]

            mixedMaterial = helpers.mixMaterials(materials1, materials2, alpha)
        mixedMaterial.set_shape(
            [None, self.nbTargetsToRead, renderSize, renderSize, 3])
        mixedMaterial = helpers.adaptRougness(mixedMaterial)
        #These 3 lines below tries to scale the albedos to get more variety and to randomly flatten the normals to disambiguate the normals and albedos. We did not see strong effect for these.
        #if not isTest and self.useAugmentationInRenderings:
        #    mixedMaterial = helpers.adaptAlbedos(mixedMaterial, self.batchSize)
        #    mixedMaterial = helpers.adaptNormals(mixedMaterial, self.batchSize)

        reshaped_targets_batch = helpers.target_reshape(
            mixedMaterial
        )  #reshape it to be compatible with the rendering algorithm [?, size, size, 12]
        nbRenderings = self.maxInputToRead
        if not self.fixImageNb:
            #If we don't want a constant number of input images, we randomly select a number of input images between 1 and the maximum number of images defined by the user.
            nbRenderings = tf.random_uniform([1],
                                             1,
                                             self.maxInputToRead + 1,
                                             dtype=tf.int32)[0]
        rendererInstance = renderer.GGXRenderer(includeDiffuse=True)
        ## Do renderings of the mixedMaterial

        targetstoRender = reshaped_targets_batch
        pixelsToAdd = 0

        targetstoRender = helpers.preprocess(
            targetstoRender)  #Put targets to -1; 1
        surfaceArray = helpers.generateSurfaceArray(
            renderSize, pixelsToAdd
        )  #Generate a grid Y,X between -1;1 to act as the pixel support of the rendering (computer the direction vector between each pixel and the light/view)

        #Do the renderings
        inputs = helpers.generateInputRenderings(
            rendererInstance,
            targetstoRender,
            self.batchSize,
            nbRenderings,
            surfaceArray,
            renderingScene,
            jitterLightPos,
            jitterViewPos,
            self.useAmbientLight,
            useAugmentationInRenderings=self.useAugmentationInRenderings)
        #inputs = [helpers.preprocess(input) for input in inputs]

        randomTopLeftCrop = tf.zeros([self.batchSize, nbRenderings, 2],
                                     dtype=tf.int32)
        averageCrop = 0.0

        #If we want to jitter the renderings around (to try to take into account small non alignment), we should handle the material crop a bit differently
        #We didn't really manage to get satisfying results with the jittering of renderings. But the code could be useful if this is of interest to Ansys.
        if self.jitterRenderings:
            randomTopLeftCrop = tf.random_normal(
                [self.batchSize, nbRenderings, 2], 0.0,
                1.0)  #renderSize - self.cropSize, dtype=tf.int32)
            randomTopLeftCrop = randomTopLeftCrop * tf.exp(
                tf.random_normal(
                    [self.batchSize], 0.0,
                    1.0))  #renderSize - self.cropSize, dtype=tf.int32)
            randomTopLeftCrop = randomTopLeftCrop - tf.reduce_mean(
                randomTopLeftCrop, axis=1, keep_dims=True)
            randomTopLeftCrop = tf.round(randomTopLeftCrop)
            randomTopLeftCrop = tf.cast(randomTopLeftCrop, dtype=tf.int32)
            averageCrop = tf.cast(self.maxJitteringPixels * 0.5,
                                  dtype=tf.int32)
            randomTopLeftCrop = randomTopLeftCrop + averageCrop
            randomTopLeftCrop = tf.clip_by_value(randomTopLeftCrop, 0,
                                                 self.maxJitteringPixels)

        totalCropSize = self.cropSize

        inputs, targets = helpers.cutSidesOut(inputs, targetstoRender,
                                              randomTopLeftCrop, totalCropSize,
                                              self.firstAsGuide, averageCrop)
        print("inputs shape after" + str(inputs.get_shape()))

        self.gammaCorrectedInputsBatch = inputs
        tf.summary.image("GammadInputs",
                         helpers.convert(inputs[0, :]),
                         max_outputs=5)
        inputs = tf.pow(inputs, 2.2)  # correct gamma
        if self.logInput:
            inputs = helpers.logTensor(inputs)

        inputs = helpers.preprocess(inputs)
        targets = helpers.target_deshape(targets, self.nbTargetsToRead)
        return targets, inputs

예제 #23

0

파일 보기

 def test_preprocessing(self):
     messages, customer = preprocess(INPUT_TEXT)
     self.assertEqual(messages, PREPROCESSED_MESSAGES)
     self.assertEqual(customer, CUSTOMER)

예제 #24

0

파일 보기

def app():
    _max_width_()
    main_df = preprocess()
    acs_df = preprocess_acs()

    st.title(
        'Filterable Philly Zip Code Map of Residential Tax Delinquencies and Census Metrics'
    )
    st.write(
        'Interactive breakdown of total accounts and principal due for actionable delinquent residential accounts in Philly, which can be filterd to show areas by levels of income, poverty, and unemployment from the census.'
    )
    st.write(
        'Use the dropdown menu to select a given delinquency metric to be displayed on the map, then a census metric to filter the map. Use the resulting slider to select the census metric threshold determining which zip codes to display. Hover over an area to view the corresponding metric value and zip code number.  While far from perfectly correlated, zip codes with larger total number of delinquent accounts and total principal due tend to have higher poverty levels and unemployment rates and lower median incomes.'
    )

    c1, c2 = st.beta_columns(2)
    metric = c1.selectbox('Select Delinquency Metric',
                          ('Total Accounts', 'Total Principal Due'))
    demo = c1.selectbox('Select Census Metric', [
        'Households Median Income', 'Percent Below Poverty',
        'Unemployment Rate'
    ])

    demo_filter = '_'.join([w for w in demo.split(' ')])
    if demo == 'Households Median Income':
        demo_slider = c1.slider('Households Median Income Below', 20000,
                                110000, 110000, 1000)
        acs_df = acs_df[acs_df[demo_filter] < demo_slider]
    else:
        if demo == 'Percent Below Poverty':
            demo_slider = c1.slider('Percent Below Poverty Above', 5, 50, 5, 1)
        else:
            demo_slider = c1.slider('Unemployment Rate Above', 2, 20, 2, 1)
        acs_df = acs_df[acs_df[demo_filter] > demo_slider]
    zips_filter = acs_df['Zip_Code'].tolist()

    philly = (40.00, -75.16)
    zips_geo = 'Zipcodes_Poly.geojson'
    with open(zips_geo) as f:
        zips_data = json.load(f)

    by_zip = filter_zip(main_df, metric)
    by_zip = by_zip[by_zip.index.isin(zips_filter)]
    z = by_zip.values.tolist()
    locations = [str(int(x)) for x in by_zip.index.tolist()]

    map_fig = go.FigureWidget(
        go.Choroplethmapbox(geojson=zips_data,
                            z=z,
                            locations=locations,
                            featureidkey="properties.CODE",
                            colorscale='YlOrRd'))
    map_fig.update_layout(mapbox_style="carto-positron",
                          mapbox_zoom=9,
                          mapbox_center={
                              "lat": philly[0],
                              "lon": philly[1]
                          })
    map_fig.update_layout(margin={
        "r": 0,
        "t": 0,
        "l": 0,
        "b": 0
    },
                          height=600,
                          width=540)

    c2.plotly_chart(map_fig)

예제 #25

0

파일 보기

파일: tx.py 프로젝트: GVictorsd/Error-correctionCodes

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import helpers


# check if input exists...
if not os.path.exists('./file.csv'):
    print('error: No raw.csv file found!!')
    exit(-1)

# A bit of preprocessing input file...
# just a simple formating, no big deal :)
removeList = [0,1,2,3,4,37,38,39,40,41]
helpers.preprocess('raw.csv','file.csv',removeList)

# import data from file.csv as pandas dataframe
data = pd.read_csv('./file.csv', names = [0,1,2])


clk = data[data.columns[2]]         # get clock data from the DataFrame
clk = list(clk)                     # and convert to python list
data = data[data.columns[0]]        # get transmitted data
data = list(data)


# define time interval where data is defined
time = [i for i in range(0, len(data))]

samplingFreq = 50   # sampling frequency for modulated analog output

예제 #26

0

파일 보기

def match(args):
    # Context
    extension_module = "cudnn"
    ctx = get_extension_context(extension_module, device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)

    # Args
    latent = args.latent
    maps = args.maps
    batch_size = 1
    image_size = args.image_size
    n_classes = args.n_classes
    not_sn = args.not_sn
    threshold = args.truncation_threshold

    # Model (SAGAN)
    nn.load_parameters(args.model_load_path)
    z = nn.Variable([batch_size, latent])
    y_fake = nn.Variable([batch_size])
    x_fake = generator(z, y_fake, maps=maps, n_classes=n_classes, test=True, sn=not_sn)\
        .apply(persistent=True)

    # Model (Inception model) from nnp file
    nnp = NnpLoader(args.nnp_inception_model_load_path)
    x, h = get_input_and_output(nnp, batch_size, args.variable_name)

    # DataIterator for a given class_id
    di = data_iterator_imagenet(args.train_dir, args.dirname_to_label_path,
                                batch_size=batch_size, n_classes=args.n_classes,
                                noise=False,
                                class_id=args.class_id)

    # Monitor
    monitor = Monitor(args.monitor_path)
    name = "Matched Image {}".format(args.class_id)
    monitor_image = MonitorImage(name, monitor, interval=1,
                                 num_images=batch_size,
                                 normalize_method=lambda x: (x + 1.) / 2. * 255.)
    name = "Matched Image Tile {}".format(args.class_id)
    monitor_image_tile = MonitorImageTile(name, monitor, interval=1,
                                          num_images=batch_size + args.top_n,
                                          normalize_method=lambda x: (x + 1.) / 2. * 255.)

    # Generate and p(h|x).forward
    # generate
    z_data = resample(batch_size, latent, threshold)
    y_data = generate_one_class(args.class_id, batch_size)
    z.d = z_data
    y_fake.d = y_data
    x_fake.forward(clear_buffer=True)
    # p(h|x).forward
    x_fake_d = x_fake.d.copy()
    x_fake_d = preprocess(
        x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess)
    x.d = x_fake_d
    h.forward(clear_buffer=True)
    h_fake_d = h.d.copy()

    # Feature matching
    norm2_list = []
    x_data_list = []
    x_data_list.append(x_fake.d)
    for i in range(di.size):
        # forward for real data
        x_d, _ = di.next()
        x_data_list.append(x_d)
        x_d = preprocess(
            x_d, (args.image_size, args.image_size), args.nnp_preprocess)
        x.d = x_d
        h.forward(clear_buffer=True)
        h_real_d = h.d.copy()
        # norm computation
        axis = tuple(np.arange(1, len(h.shape)).tolist())
        norm2 = np.sum((h_real_d - h_fake_d) ** 2.0, axis=axis)
        norm2_list.append(norm2)

    # Save top-n images
    argmins = np.argsort(norm2_list)
    for i in range(args.top_n):
        monitor_image.add(i, x_data_list[i])
    matched_images = np.concatenate(x_data_list)
    monitor_image_tile.add(0, matched_images)

예제 #27

0

파일 보기

파일: run.py 프로젝트: guillaumemichel/Project1-ML

model = -1
while model < 0 or model > 7:
    model = input("Enter a valid number: ")
    try:
        model = int(model)
    except ValueError:
        model = -1

#Load the train and test data
print("Loading the data...")

y_tr, input_data_train, _ = load_csv_data("data/train.csv")
y_te, input_data_test, ids_test = load_csv_data("data/test.csv")

#Preprocess train and test data
print("Preprocessing the data...")

tx_tr = preprocess(input_data_train)
tx_te = preprocess(input_data_test)

#Compute the optimal weights
print("Computing the optimal weights...")

losses, optimal_weights = choose_model(y_tr, tx_tr, models[model], np.zeros(tx_tr.shape[1]), 500, 2e-6, 0.0008)

print("Test accuracy: ", compute_accuracy(y_te, tx_te, optimal_weights))
print("Training accuracy: ", compute_accuracy(y_tr, tx_tr, optimal_weights))

y_pred = predict_labels(optimal_weights, tx_te)
create_csv_submission(ids_test, y_pred, "submission.csv")

예제 #28

0

파일 보기

def app():
    _max_width_()

    st.title(
        'Side-by-Side Philly Zip Code Map of Residential Tax Delinquencies and Census Metrics'
    )
    st.write(
        'Interactive breakdown of total accounts and principal due for actionable delinquent residential accounts in Philly, side-by-side with income, poverty, and unemployment data from the census.'
    )
    st.write(
        'Use the dropdown menus to select a given delinquency and census metric associated with the corresponding map. Hover over an area to view the corresponding metric value and zip code number. While far from perfectly correlated, zip codes with larger total number of delinquent accounts and total principal due tend to have higher poverty levels and unemployment rates and lower median incomes.'
    )

    main_df = preprocess()

    c1, c2 = st.beta_columns(2)

    metric = c1.selectbox('Select Delinquency Metric',
                          ('Total Accounts', 'Total Principal Due'))

    philly = (40.00, -75.16)
    zips_geo = 'Zipcodes_Poly.geojson'
    with open(zips_geo) as f:
        zips_data = json.load(f)

    by_zip = filter_zip(main_df, metric)
    z = by_zip.values.tolist()
    locations = [str(int(x)) for x in by_zip.index.tolist()]

    map_fig = go.FigureWidget(
        go.Choroplethmapbox(geojson=zips_data,
                            z=z,
                            locations=locations,
                            featureidkey="properties.CODE",
                            colorscale='YlOrRd'))
    map_fig.update_layout(mapbox_style="carto-positron",
                          mapbox_zoom=9,
                          mapbox_center={
                              "lat": philly[0],
                              "lon": philly[1]
                          })
    map_fig.update_layout(margin={
        "r": 0,
        "t": 0,
        "l": 0,
        "b": 0
    },
                          height=600,
                          width=540)

    c1.plotly_chart(map_fig)

    acs_df = preprocess_acs()
    acs_metric = c2.selectbox('Select Census Metric',
                              ('Households Median Income',
                               'Percent Below Poverty', 'Unemployment Rate'))

    acs_map_fig = go.FigureWidget(
        go.Choroplethmapbox(geojson=zips_data,
                            z=acs_df['_'.join(
                                [w for w in acs_metric.split(' ')])],
                            locations=acs_df['Zip_Code'],
                            featureidkey="properties.CODE",
                            colorscale='YlOrRd'))
    acs_map_fig.update_layout(mapbox_style="carto-positron",
                              mapbox_zoom=9,
                              mapbox_center={
                                  "lat": philly[0],
                                  "lon": philly[1]
                              })
    acs_map_fig.update_layout(margin={
        "r": 0,
        "t": 0,
        "l": 0,
        "b": 0
    },
                              height=600,
                              width=540)

    c2.plotly_chart(acs_map_fig)

예제 #29

0

파일 보기

def main():
    soup = getsoup(SPEC_PAGE)
    cleansoup = preprocess(soup)
    items = cleansoup.find_all(class_="tech_spec_wrap spec_toggle")
    specs = getspecs(items)
    print(specs)

예제 #30

0

파일 보기

파일: app.py 프로젝트: SlimHintz/bait-n-switch

def predict():
    """
    This function should handle a POST request by running the model through 
    my pipeline and then predicting on it.

    """
    if request.method == "GET":
        return render_template("input.html")

    if request.method == "POST":
        headline = request.form.get("headline")

        # check to see if the user entered a url:
        """
        The url is defined as starting with http.
        """
        urls = find_url(headline)

        if len(urls) > 1:
            message = "please only submit 1 url at a time"
            return render_template("apology.html", message=message)
        if urls:
            """
            Bug, if a url is entered that has no h1-h4 tags, an error is thrown FIXED
            Bug, some urls just don't return anything at all.
            """
            try:
                # retrieve the urls using the function created in helpers.py
                prediction_dfs = [
                    predict_on_html(get_html_series(url), model, tfidf)
                    for url in urls
                ]
                clickbait_proportion = np.mean(
                    [df.target.mean() for df in prediction_dfs])
                df = prediction_dfs[0]
                total_headlines = len(df)
                num_bait = len(df[df.target == 1])
                num_norm = len(df[df.target == 0])

                str_percentage = str(round((clickbait_proportion * 100), 0))
            except:
                message = "Bait 'n' Switch was unable to parse the website you provided"
                return render_template("apology.html", message=message)

            return render_template("url_prediction.html",
                                   proportion=(clickbait_proportion),
                                   percentage=str_percentage,
                                   total_headlines=total_headlines,
                                   num_bait=num_bait,
                                   num_norm=num_norm)

        # Check if the headline is at least 4 words long
        headline_length = len(headline.split())

        if headline_length <= 3:
            return render_template("too_short.html",
                                   headline_length=str(headline_length))

        #  to see if the user entered a url

        # Clean headline
        headline = preprocess(headline, length=0)

        # Convert the headline to a series
        headline_series = pd.Series(data=(headline), index=[0])

        # Use the prefit tfidf vectorizer to transform the headline
        headline_tfidf = tfidf.transform(headline_series)

        # Predict on the tfidf headline
        prediction = model.predict(headline_tfidf)

        # Send headline prediction to display
        return render_template("success.html",
                               headline=headline_series[0],
                               prediction=prediction)