def compute_frechet_inception_distance(z, y_fake, x_fake, x, y, args, di=None): h_fakes = [] h_reals = [] for i in range(args.max_iter): logger.info("Compute at {}-th batch".format(i)) # Generate z.d = np.random.randn(args.batch_size, args.latent) y_fake.d = generate_random_class(args.n_classes, args.batch_size) x_fake.forward(clear_buffer=True) # Predict for fake x_fake_d = x_fake.d.copy() x_fake_d = preprocess( x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_fake_d y.forward(clear_buffer=True) h_fakes.append(y.d.copy().squeeze()) # Predict for real x_d, _ = di.next() x_d = preprocess( x_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_d y.forward(clear_buffer=True) h_reals.append(y.d.copy().squeeze()) h_fakes = np.concatenate(h_fakes) h_reals = np.concatenate(h_reals) # FID score ave_h_real = np.mean(h_reals, axis=0) ave_h_fake = np.mean(h_fakes, axis=0) cov_h_real = np.cov(h_reals, rowvar=False) cov_h_fake = np.cov(h_fakes, rowvar=False) score = np.sum((ave_h_real - ave_h_fake) ** 2) \ + np.trace(cov_h_real + cov_h_fake - 2.0 * sqrtm(np.dot(cov_h_real, cov_h_fake))) return score
def __readImages(self, filename): image_string = tf.read_file( filename) #Gets a string tensor from a file decodedInput = tf.image.decode_image( image_string) #Decode a string tensor as image floatInput = tf.image.convert_image_dtype( decodedInput, dtype=tf.float32) #Transform image to float32 assertion = tf.assert_equal(tf.shape(floatInput)[-1], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): floatInput.set_shape([None, None, 3]) inputShape = floatInput.get_shape() if self.mode == "eval": #If the inputs are only the number of pictures declared blackTargets = tf.zeros([ self.inputImageSize, self.inputImageSize * self.nbTargetsToRead, 3 ]) floatInput = tf.concat([floatInput, blackTargets], axis=1) floatInputSplit = tf.split( floatInput, self.nbTargetsToRead + self.inputNumbers, axis=1, name="Split_input_data" ) #Splitted we get a list of nbTargets + inputNumbers images #Sets the inputs and outputs depending on the order of images if self.which_direction == "AtoB": inputs = floatInputSplit[:self.inputNumbers] targets = floatInputSplit[self.inputNumbers:] elif self.which_direction == "BtoA": inputs = floatInputSplit[self.inputNumbers:] targets = floatInputSplit[:self.inputNumbers] else: raise ValueError("Invalid direction") gammadInputs = inputs inputs = [tf.pow(input, 2.2) for input in inputs] #correct for the gamma #If we want to log the inputs, we do it here if self.logInput: inputs = [helpers.logTensor(input) for input in inputs] #The preprocess function puts the vectors value between [-1; 1] from [0;1] inputs = [helpers.preprocess(input) for input in inputs] #gammadInputs = [helpers.preprocess(gammadInput) for gammadInput in gammadInputs] targets = [helpers.preprocess(target) for target in targets] #We used to resize inputs and targets here, we have no functional need for it. Will see if there is a technical need to define the actual size. return filename, inputs, targets, gammadInputs
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) image_array = hlp.preprocess(np.asarray(image)) steering_angle = float( model.predict(image_array[None, :, :, :], batch_size=1)) throttle = controller.update(float(speed)) print(steering_angle, throttle) send_control(steering_angle, throttle) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def __readImages(self, filename): image_string = tf.read_file( filename) #Gets a string tensor from a file decodedInput = tf.image.decode_image( image_string) #Decode a string tensor as image floatInput = tf.image.convert_image_dtype( decodedInput, dtype=tf.float32) #Transform image to float32 assertion = tf.assert_equal(tf.shape(floatInput)[-1], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): floatInput.set_shape([None, None, 3]) gammadInput = floatInput #print("CAREFUL THE GAMMA IS NOT CORRECTED AUTOMATICALLY") #input = floatInput input = tf.pow(floatInput, 2.2) #correct for the gamma #If we want to log the inputs, we do it here if self.logInput: input = helpers.logTensor(input) #The preprocess function puts the vectors value between [-1; 1] from [0;1] input = helpers.preprocess(input) targets = tf.zeros(tf.shape(input)) # is here (None, None, 3) targets = tf.expand_dims(targets, axis=0) targets = tf.tile(targets, (self.nbTargetsToRead, 1, 1, 1)) return filename, input, targets, gammadInput
def telemetry(sid, data): if data: #Checking current speed, throttle, steering angle, and image steering_angle = float(data["steering_angle"]) throttle = float(data["throttle"]) speed = float(data["speed"]) image = Image.open(BytesIO(base64.b64decode(data["image"]))) try: #Changing the center image #convert PIL image to np array image = np.asarray(image) #apply the preprocessing image = helpers.preprocess(image) #our training model expects 4D array image = np.array([image]) #use trained model to predict angle for a given image steering_angle = float(model.predict(image, batch_size=1)) #Set throttle of the car throttle = float(1) - (steering_angle**2) - (speed / 30)**2 print('steering angle:{}, speed:{}, throttle:{}'.format( steering_angle, speed, throttle)) #send back the steering angle, and throttle send_control(steering_angle, throttle) except Exception as e: print(e) else: sio.emit('manual', data={}, skip_sid=True)
def __renderInputs(self, materials, renderingScene, jitterLightPos, jitterViewPos, mixMaterials): fullSizeMixedMaterial = materials if mixMaterials: alpha = tf.random_uniform([1], minval=0.1, maxval=0.9, dtype=tf.float32, name="mixAlpha") materials1 = materials[::2] materials2 = materials[1::2] fullSizeMixedMaterial = helpers.mixMaterials(materials1, materials2, alpha) if self.inputImageSize >= self.tileSize : if self.fixCrop: xyCropping = (self.inputImageSize - self.tileSize) // 2 xyCropping = [xyCropping, xyCropping] else: xyCropping = tf.random_uniform([2], 0, self.inputImageSize - self.tileSize, dtype=tf.int32) cropped_mixedMaterial = fullSizeMixedMaterial[:,:, xyCropping[0] : xyCropping[0] + self.tileSize, xyCropping[1] : xyCropping[1] + self.tileSize, :] elif self.inputImageSize < self.tileSize: raise Exception("Size of the input is inferior to the size of the rendering, please provide higher resolution maps") cropped_mixedMaterial.set_shape([None, self.nbTargetsToRead, self.tileSize, self.tileSize, 3]) mixedMaterial = helpers.adaptRougness(cropped_mixedMaterial) targetstoRender = helpers.target_reshape(mixedMaterial) #reshape it to be compatible with the rendering algorithm [?, size, size, 12] nbRenderings = 1 rendererInstance = renderer.GGXRenderer(includeDiffuse = True) ## Do renderings of the mixedMaterial targetstoRender = helpers.preprocess(targetstoRender) #Put targets to -1; 1 surfaceArray = helpers.generateSurfaceArray(self.tileSize) inputs = helpers.generateInputRenderings(rendererInstance, targetstoRender, self.batchSize, nbRenderings, surfaceArray, renderingScene, jitterLightPos, jitterViewPos, self.useAmbientLight, useAugmentationInRenderings = self.useAugmentationInRenderings) self.gammaCorrectedInputsBatch = tf.squeeze(inputs, [1]) inputs = tf.pow(inputs, 2.2) # correct gamma if self.logInput: inputs = helpers.logTensor(inputs) inputs = helpers.preprocess(inputs) #Put inputs to -1; 1 targets = helpers.target_deshape(targetstoRender, self.nbTargetsToRead) return targets, inputs
def add_section(section=None, path=None, content=None, parent="", output={}): if isinstance(section, WikipediaPageSection): title = section.title text = section.text level = section.level else: title = "summary" text = section level = 1 new_path = f"{path}/{title}" start = content.index(text) end = start + len(text) hash_value = hashlib.md5(new_path.encode()).hexdigest() output["sections"][hash_value] = { "parent": preprocess(parent), "level": level, "start": start, "end": end, "title": preprocess(title), "path": preprocess(new_path) } return output, new_path
def _preprocessBeforeConversionToNumpy(self, image): if isinstance(image, PIL.Image.Image): # switches PIL to cv2 self._im = np.array(image) if len(self._im.shape) <= 2: raise IOError("Image format not supported for preprocessing.") # set output shape (same as input shape) self._result_shape = [self._im.shape[0], self._im.shape[1]] # set rgb mean of input image (used in mean subtraction) self._rgb_mean = cv2.mean(self._im) pre = preprocess(self._im, self._rgb_mean) return pre else: raise IOError("Image Type not supported for preprocessing.")
def compute_inception_score(z, y_fake, x_fake, x, y, args): preds = [] for i in range(args.max_iter): logger.info("Compute at {}-th batch".format(i)) # Generate z.d = np.random.randn(args.batch_size, args.latent) y_fake.d = generate_random_class(args.n_classes, args.batch_size) x_fake.forward(clear_buffer=True) # Predict x_fake_d = x_fake.d.copy() x_fake_d = preprocess( x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_fake_d y.forward(clear_buffer=True) preds.append(y.d.copy()) p_yx = np.concatenate(preds) # Score p_y = np.mean(p_yx, axis=0) kld = np.sum(p_yx * (np.log(p_yx) - np.log(p_y)), axis=1) score = np.exp(np.mean(kld)) return score
def run_pipeline(iF): try: print('Now working on '+ iF) dataset = lm.loadmat(iF) dataset = preprocess(dataset) if 'anatomy' not in dataset.keys(): return else: anatomy = dataset['anatomy'] if 'parent_shifted' in anatomy: group = anatomy['parent_shifted'] else: group = anatomy['cluster_parent'] region = 'MEC' idx = [region in ss for ss in group] idx = np.array(idx) idx = idx[dataset['sp']['cgs']==2] if idx.sum()==0: return dataset['spikecount']=dataset['spikecount'][:,idx] (model, bl_scores) = eval_and_train(dataset) (Ypred,Ytrue,speed,trial,c_matrix) = score_gain_model(model,dataset) plt.plot(Ytrue) plt.plot(dataset['posx_centers'][Ypred-1]) name = os.path.basename(iF)[0:-4] plt.savefig('F:\\temp\\classifier_out\\'+region +'_'+ name + '.png') plt.close() tmp_array = np.array([Ypred,Ytrue,speed,trial,dataset['posx_edges']]) np.save('F:\\temp\\classifier_out\\'+region +'_'+ name + '_scores.npy',tmp_array) #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_scores.npy',tmp_array) #np.save('/oak/stanford/groups/giocomo/attialex/processed_data/classifier_output1/'+region +'_'+ name + '_confMatrix.npy',conf_matrix) except Exception as e: print(str(e)) print('not working') pass
df = pd.read_csv('bugs-2019-11-25.csv') # Make target lables from product and component values while dropping labels that have less than 10 occurencies df['target'] = df[['Product', 'Component']].apply(' -- '.join, axis=1) df = df.groupby('target').filter(lambda x: len(x) > 50) df['target'] = df['target'].astype('category') df['target_labels'] = df['target'].cat.codes # Check that there are no missing summaries print(f"Number of missing comments in comment text: {df['Summary'].isnull().sum()}") # Explore categories explore(df['target'], 40) # Preprocess Summary dataset print(f"Summary column before preprocessing:\n{df['Summary'].head()}") df['Summary'] = preprocess(df['Summary']) print(f"Summary column after preprocessing:\n{df['Summary'].head()}") # Split dataset into train and test data X = df[['Summary', 'Reporter', 'Assignee', 'OS']].apply(' '.join, axis=1) y = df['target_labels'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True) print(f"Train dataset shape: {X_train.shape, y_train.shape}") print(f"Test dataset shape: {X_test.shape, y_test.shape}") # Initializing TfidfVectorizer. Using option stop_words=’english’ will stop considering common english words vectorizer = TfidfVectorizer() # Vectorize the train dataset X_train_vectors = vectorizer.fit_transform(X_train)
def main(): soup = getsoup(PHONE_SPECS_PAGE) cleansoup = preprocess(soup) specs = cleansoup.find_all(class_="techspecs-section") specsdict = getspecs(specs) print(specsdict)
def populateInNetworkFeedGraphSpatialMix(self, renderingScene, shuffle=True, imageSize=512, useSpatialMix=True): with tf.name_scope("load_images"): #Create a tensor out of the list of paths filenamesTensor = tf.constant(self.pathList) #Reads a slice of the tensor, for example, if the tensor is of shape [100,2], the slice shape should be [2] (to check if we have problem here) dataset = tf.data.Dataset.from_tensor_slices(filenamesTensor) #for each slice apply the __readImages function dataset = dataset.map(self.__readImagesGT, num_parallel_calls=int( multiprocessing.cpu_count() / 4)) #Authorize repetition of the dataset when one epoch is over. #shuffle = True if shuffle: dataset = dataset.shuffle(buffer_size=16, reshuffle_each_iteration=True) #set batch size dataset = dataset.repeat() toPull = self.batchSize if useSpatialMix: toPull = self.batchSize * 2 batched_dataset = dataset.batch(toPull) batched_dataset = batched_dataset.prefetch(buffer_size=4) #Create an iterator to be initialized iterator = batched_dataset.make_initializable_iterator() #Create the node to retrieve next batch paths_batch, targets_batch = iterator.get_next() inputRealSize = imageSize #Should be input image size but changed tmp if useSpatialMix: threshold = 0.5 perlinNoise = tf.expand_dims(tf.expand_dims( helpers.generate_perlin_noise_2d( (inputRealSize, inputRealSize), (1, 1)), axis=-1), axis=0) perlinNoise = (perlinNoise + 1.0) * 0.5 perlinNoise = perlinNoise >= threshold perlinNoise = tf.cast(perlinNoise, tf.float32) inverted = 1.0 - perlinNoise materialsMixed1 = targets_batch[::2] * perlinNoise materialsMixed2 = targets_batch[1::2] * inverted fullSizeMixedMaterial = materialsMixed1 + materialsMixed2 targets_batch = fullSizeMixedMaterial paths_batch = paths_batch[::2] targetstoRender = helpers.target_reshape( targets_batch ) #reshape it to be compatible with the rendering algorithm [?, size, size, 12] nbRenderings = 1 rendererInstance = renderer.GGXRenderer(includeDiffuse=True) ## Do renderings of the mixedMaterial mixedMaterial = helpers.adaptRougness(targetstoRender) targetstoRender = helpers.preprocess( targetstoRender) #Put targets to -1; 1 surfaceArray = helpers.generateSurfaceArray(inputRealSize) inputs_batch = helpers.generateInputRenderings( rendererInstance, targetstoRender, self.batchSize, nbRenderings, surfaceArray, renderingScene, False, False, self.useAmbientLight, useAugmentationInRenderings=self.useAugmentationInRenderings) targets_batch = helpers.target_deshape(targetstoRender, self.nbTargetsToRead) self.gammaCorrectedInputsBatch = tf.squeeze(inputs_batch, [1]) #tf.summary.image("GammadInputs", helpers.convert(inputs[0, :]), max_outputs=5) inputs_batch = tf.pow(inputs_batch, 2.2) # correct gamma if self.logInput: inputs_batch = helpers.logTensor(inputs_batch) #Do the random crop, if the crop if fix, crop in the middle if inputRealSize > self.tileSize: if self.fixCrop: xyCropping = (inputRealSize - self.tileSize) // 2 xyCropping = [xyCropping, xyCropping] else: xyCropping = tf.random_uniform([1], 0, inputRealSize - self.tileSize, dtype=tf.int32) inputs_batch = inputs_batch[:, :, xyCropping[0]:xyCropping[0] + self.tileSize, xyCropping[0]:xyCropping[0] + self.tileSize, :] targets_batch = targets_batch[:, :, xyCropping[0]:xyCropping[0] + self.tileSize, xyCropping[0]:xyCropping[0] + self.tileSize, :] #Set shapes inputs_batch = tf.squeeze( inputs_batch, [1] ) #Before this the input has a useless dimension in 1 as we have only 1 rendering inputs_batch.set_shape([None, self.tileSize, self.tileSize, 3]) targets_batch.set_shape( [None, self.nbTargetsToRead, self.tileSize, self.tileSize, 3]) #Populate the object self.stepsPerEpoch = int( math.floor(len(self.pathList) / self.batchSize)) self.inputBatch = inputs_batch self.targetBatch = targets_batch self.iterator = iterator self.pathBatch = paths_batch
df = pd.read_csv( r"data\forestfires.csv", parse_dates=[], index_col=[], ) print( pd.concat([df.dtypes, df.nunique() / len(df)], axis=1).rename({ 0: "dtype", 1: "proportion unique" }, axis=1).sort_values(["dtype", "proportion unique"])) ENCODE = True CATEGORIZE = True X, y = preprocess(df, False, True, False) sns.kdeplot(y) plt.title("KDE distribution") plt.show() SEED = 0 SAMPLE_SIZE = 10000 Xt, Xv, yt, yv = train_test_split( X, y, random_state=SEED) # split into train and validation set dt = lgb.Dataset(Xt, yt, free_raw_data=False) np.random.seed(SEED) sample_idx = np.random.choice(Xt.index, size=SAMPLE_SIZE) Xs, ys = Xt.loc[sample_idx], yt.loc[sample_idx] ds = lgb.Dataset(Xs, ys) dv = lgb.Dataset(Xv, yv, free_raw_data=False)
def main(): soup = getsoup(PATH) cleansoup = preprocess(soup) specs = getspecs(cleansoup) print(specs)
def test_all(self): text = read_from_file('../input.txt') messages, customer = preprocess(text) suggestions = algorithm(PREPROCESSED_MESSAGES, CUSTOMER) postprocessed = postprocess(suggestions, customer) self.assertEqual(postprocessed, POSTPROCESSED_TEXT)
def get_wiki_json(title): page = wiki.page(title) output = parse_sections(page, content=page.text) output["text"] = preprocess(page.text) return output
import lightgbm as lgb from helpers import preprocess import pandas as pd from pathlib import Path import matplotlib.pyplot as plt df = pd.read_csv( r"data\forestfires.csv", parse_dates=[], index_col=[], ) X, y = preprocess(df, encode=False, categorize=True, preran=False) X = X.drop("rain", axis=1) d = lgb.Dataset(X, y, silent=True) # rmse: 98.18188205858038 NUM_BOOST_ROUND = 455 params = { "objective": "rmse", "metric": "rmse", "verbose": -1, "n_jobs": 6, "learning_rate": 0.004090619790710353, "feature_pre_filter": False, "lambda_l1": 6.99239231800302e-08, "lambda_l2": 9.330959145992983, "num_leaves": 9, "feature_fraction": 0.8999999999999999, "bagging_fraction": 1.0, "bagging_freq": 0, "min_child_samples": 20,
parse_dates=[], index_col=[], delimiter=";") print( pd.concat([df.dtypes, df.nunique() / len(df)], axis=1).rename({ 0: "dtype", 1: "proportion unique" }, axis=1).sort_values(["dtype", "proportion unique"])) ENCODE = False CATEGORIZE = True y = df["Divorce"] df = df.drop("Divorce", axis=1) X = preprocess(df, ENCODE, 5, True) sns.kdeplot(y) plt.title("KDE distribution") plt.show() SEED = 0 SAMPLE_SIZE = 10000 Xt, Xv, yt, yv = train_test_split( X, y, random_state=SEED) # split into train and validation set dt = lgb.Dataset(Xt, yt, free_raw_data=False) np.random.seed(SEED) sample_idx = np.random.choice(Xt.index, size=SAMPLE_SIZE) Xs, ys = Xt.loc[sample_idx], yt.loc[sample_idx] ds = lgb.Dataset(Xs, ys) dv = lgb.Dataset(Xv, yv, free_raw_data=False)
def main(): soup = getsoup(SPEC_PAGE) cleansoup = preprocess(soup) getspecs(cleansoup)
def main(): modifiers = itemData.instantiateFromCSVtoitemData(MODIFIERS_FILE) targets = targets = itemData.instantiateFromCSVtoitemData(TARGETS_FILE) df = pd.read_pickle(SOURCE_DF) df = df[df.train_val == 'val'] print(df.head()) print(len(df)) #df = df.iloc[:10] ref = pd.read_excel(REFERENCE_STANDARD) ref = update_reference_df(ref) reports = list(zip(df['note_name'], df['text'])) pool = Pool(processes=8) list_of_classified_markups = [ pool.apply(extract_markups_from_text, args=(name_and_text, targets, modifiers)) for name_and_text in reports ] pool.close() pool.join() classified_markups = pd.DataFrame( columns=['m', 'doc_span', 'markup_class', 'text']).append( list_of_classified_markups) print(classified_markups.head()) exit() ##PICK up here classified_markups = [{ 'm': m, 'doc_span': m.docSpan, 'markup_class': m.markup_class, 'text': m.text } for m in list_of_markups] # TODO: Make this one long dataframe, like classified_markups df['markups'] = df.apply( lambda row: extract_markups_from_text(row.text, targets, modifiers), axis=1) print(df.head()) classified_markups = pd.DataFrame( columns=['m', 'doc_span', 'markup_class', 'text']) for idx, row in df.iterrows(): # Get all annotations from reference standard with this report name #annotations = ref[ref['File Name with extension'] == row.note_name] row_markups = classify_markups(row.markups, row.note_name) print(classified_markups) #if classified_markups classified_markups = classified_markups.append(row_markups, ignore_index=True) print(len(classified_markups)) print(classified_markups.head()) evaluate_markups(ref, classified_markups) exit() reports = list(df[df.train_val == 'train']['text']) reports = [helpers.preprocess(report) for report in reports] split_reports = [ helpers.my_sentence_splitter(report) for report in reports ] markups = [] for report in split_reports[:10]: # Each report is a list of sentence span pairs for text, span in report: m = create_markup(s=text, modifiers=modifiers, targets=targets, span=span) markups.append(m) print(markups) exit() markups = [ create_markup(s=sentence, modifiers=modifiers, targets=targets, span=span) for (sentence, span) in sentence_span_pairs ] report_names = list(set(df.note_name)) for report in report_names: report_df = df[df.note_name == report] evaluate_report(report_df)
def __renderInputs(self, materials, renderingScene, jitterLightPos, jitterViewPos, mixMaterials, isTest, renderSize): mixedMaterial = materials if mixMaterials: alpha = tf.random_uniform([1], minval=0.1, maxval=0.9, dtype=tf.float32, name="mixAlpha") #print("mat2: " + str(materials2)) materials1 = materials[::2] materials2 = materials[1::2] mixedMaterial = helpers.mixMaterials(materials1, materials2, alpha) mixedMaterial.set_shape( [None, self.nbTargetsToRead, renderSize, renderSize, 3]) mixedMaterial = helpers.adaptRougness(mixedMaterial) #These 3 lines below tries to scale the albedos to get more variety and to randomly flatten the normals to disambiguate the normals and albedos. We did not see strong effect for these. #if not isTest and self.useAugmentationInRenderings: # mixedMaterial = helpers.adaptAlbedos(mixedMaterial, self.batchSize) # mixedMaterial = helpers.adaptNormals(mixedMaterial, self.batchSize) reshaped_targets_batch = helpers.target_reshape( mixedMaterial ) #reshape it to be compatible with the rendering algorithm [?, size, size, 12] nbRenderings = self.maxInputToRead if not self.fixImageNb: #If we don't want a constant number of input images, we randomly select a number of input images between 1 and the maximum number of images defined by the user. nbRenderings = tf.random_uniform([1], 1, self.maxInputToRead + 1, dtype=tf.int32)[0] rendererInstance = renderer.GGXRenderer(includeDiffuse=True) ## Do renderings of the mixedMaterial targetstoRender = reshaped_targets_batch pixelsToAdd = 0 targetstoRender = helpers.preprocess( targetstoRender) #Put targets to -1; 1 surfaceArray = helpers.generateSurfaceArray( renderSize, pixelsToAdd ) #Generate a grid Y,X between -1;1 to act as the pixel support of the rendering (computer the direction vector between each pixel and the light/view) #Do the renderings inputs = helpers.generateInputRenderings( rendererInstance, targetstoRender, self.batchSize, nbRenderings, surfaceArray, renderingScene, jitterLightPos, jitterViewPos, self.useAmbientLight, useAugmentationInRenderings=self.useAugmentationInRenderings) #inputs = [helpers.preprocess(input) for input in inputs] randomTopLeftCrop = tf.zeros([self.batchSize, nbRenderings, 2], dtype=tf.int32) averageCrop = 0.0 #If we want to jitter the renderings around (to try to take into account small non alignment), we should handle the material crop a bit differently #We didn't really manage to get satisfying results with the jittering of renderings. But the code could be useful if this is of interest to Ansys. if self.jitterRenderings: randomTopLeftCrop = tf.random_normal( [self.batchSize, nbRenderings, 2], 0.0, 1.0) #renderSize - self.cropSize, dtype=tf.int32) randomTopLeftCrop = randomTopLeftCrop * tf.exp( tf.random_normal( [self.batchSize], 0.0, 1.0)) #renderSize - self.cropSize, dtype=tf.int32) randomTopLeftCrop = randomTopLeftCrop - tf.reduce_mean( randomTopLeftCrop, axis=1, keep_dims=True) randomTopLeftCrop = tf.round(randomTopLeftCrop) randomTopLeftCrop = tf.cast(randomTopLeftCrop, dtype=tf.int32) averageCrop = tf.cast(self.maxJitteringPixels * 0.5, dtype=tf.int32) randomTopLeftCrop = randomTopLeftCrop + averageCrop randomTopLeftCrop = tf.clip_by_value(randomTopLeftCrop, 0, self.maxJitteringPixels) totalCropSize = self.cropSize inputs, targets = helpers.cutSidesOut(inputs, targetstoRender, randomTopLeftCrop, totalCropSize, self.firstAsGuide, averageCrop) print("inputs shape after" + str(inputs.get_shape())) self.gammaCorrectedInputsBatch = inputs tf.summary.image("GammadInputs", helpers.convert(inputs[0, :]), max_outputs=5) inputs = tf.pow(inputs, 2.2) # correct gamma if self.logInput: inputs = helpers.logTensor(inputs) inputs = helpers.preprocess(inputs) targets = helpers.target_deshape(targets, self.nbTargetsToRead) return targets, inputs
def test_preprocessing(self): messages, customer = preprocess(INPUT_TEXT) self.assertEqual(messages, PREPROCESSED_MESSAGES) self.assertEqual(customer, CUSTOMER)
def app(): _max_width_() main_df = preprocess() acs_df = preprocess_acs() st.title( 'Filterable Philly Zip Code Map of Residential Tax Delinquencies and Census Metrics' ) st.write( 'Interactive breakdown of total accounts and principal due for actionable delinquent residential accounts in Philly, which can be filterd to show areas by levels of income, poverty, and unemployment from the census.' ) st.write( 'Use the dropdown menu to select a given delinquency metric to be displayed on the map, then a census metric to filter the map. Use the resulting slider to select the census metric threshold determining which zip codes to display. Hover over an area to view the corresponding metric value and zip code number. While far from perfectly correlated, zip codes with larger total number of delinquent accounts and total principal due tend to have higher poverty levels and unemployment rates and lower median incomes.' ) c1, c2 = st.beta_columns(2) metric = c1.selectbox('Select Delinquency Metric', ('Total Accounts', 'Total Principal Due')) demo = c1.selectbox('Select Census Metric', [ 'Households Median Income', 'Percent Below Poverty', 'Unemployment Rate' ]) demo_filter = '_'.join([w for w in demo.split(' ')]) if demo == 'Households Median Income': demo_slider = c1.slider('Households Median Income Below', 20000, 110000, 110000, 1000) acs_df = acs_df[acs_df[demo_filter] < demo_slider] else: if demo == 'Percent Below Poverty': demo_slider = c1.slider('Percent Below Poverty Above', 5, 50, 5, 1) else: demo_slider = c1.slider('Unemployment Rate Above', 2, 20, 2, 1) acs_df = acs_df[acs_df[demo_filter] > demo_slider] zips_filter = acs_df['Zip_Code'].tolist() philly = (40.00, -75.16) zips_geo = 'Zipcodes_Poly.geojson' with open(zips_geo) as f: zips_data = json.load(f) by_zip = filter_zip(main_df, metric) by_zip = by_zip[by_zip.index.isin(zips_filter)] z = by_zip.values.tolist() locations = [str(int(x)) for x in by_zip.index.tolist()] map_fig = go.FigureWidget( go.Choroplethmapbox(geojson=zips_data, z=z, locations=locations, featureidkey="properties.CODE", colorscale='YlOrRd')) map_fig.update_layout(mapbox_style="carto-positron", mapbox_zoom=9, mapbox_center={ "lat": philly[0], "lon": philly[1] }) map_fig.update_layout(margin={ "r": 0, "t": 0, "l": 0, "b": 0 }, height=600, width=540) c2.plotly_chart(map_fig)
import pandas as pd import numpy as np import matplotlib.pyplot as plt import os import helpers # check if input exists... if not os.path.exists('./file.csv'): print('error: No raw.csv file found!!') exit(-1) # A bit of preprocessing input file... # just a simple formating, no big deal :) removeList = [0,1,2,3,4,37,38,39,40,41] helpers.preprocess('raw.csv','file.csv',removeList) # import data from file.csv as pandas dataframe data = pd.read_csv('./file.csv', names = [0,1,2]) clk = data[data.columns[2]] # get clock data from the DataFrame clk = list(clk) # and convert to python list data = data[data.columns[0]] # get transmitted data data = list(data) # define time interval where data is defined time = [i for i in range(0, len(data))] samplingFreq = 50 # sampling frequency for modulated analog output
def match(args): # Context extension_module = "cudnn" ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Args latent = args.latent maps = args.maps batch_size = 1 image_size = args.image_size n_classes = args.n_classes not_sn = args.not_sn threshold = args.truncation_threshold # Model (SAGAN) nn.load_parameters(args.model_load_path) z = nn.Variable([batch_size, latent]) y_fake = nn.Variable([batch_size]) x_fake = generator(z, y_fake, maps=maps, n_classes=n_classes, test=True, sn=not_sn)\ .apply(persistent=True) # Model (Inception model) from nnp file nnp = NnpLoader(args.nnp_inception_model_load_path) x, h = get_input_and_output(nnp, batch_size, args.variable_name) # DataIterator for a given class_id di = data_iterator_imagenet(args.train_dir, args.dirname_to_label_path, batch_size=batch_size, n_classes=args.n_classes, noise=False, class_id=args.class_id) # Monitor monitor = Monitor(args.monitor_path) name = "Matched Image {}".format(args.class_id) monitor_image = MonitorImage(name, monitor, interval=1, num_images=batch_size, normalize_method=lambda x: (x + 1.) / 2. * 255.) name = "Matched Image Tile {}".format(args.class_id) monitor_image_tile = MonitorImageTile(name, monitor, interval=1, num_images=batch_size + args.top_n, normalize_method=lambda x: (x + 1.) / 2. * 255.) # Generate and p(h|x).forward # generate z_data = resample(batch_size, latent, threshold) y_data = generate_one_class(args.class_id, batch_size) z.d = z_data y_fake.d = y_data x_fake.forward(clear_buffer=True) # p(h|x).forward x_fake_d = x_fake.d.copy() x_fake_d = preprocess( x_fake_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_fake_d h.forward(clear_buffer=True) h_fake_d = h.d.copy() # Feature matching norm2_list = [] x_data_list = [] x_data_list.append(x_fake.d) for i in range(di.size): # forward for real data x_d, _ = di.next() x_data_list.append(x_d) x_d = preprocess( x_d, (args.image_size, args.image_size), args.nnp_preprocess) x.d = x_d h.forward(clear_buffer=True) h_real_d = h.d.copy() # norm computation axis = tuple(np.arange(1, len(h.shape)).tolist()) norm2 = np.sum((h_real_d - h_fake_d) ** 2.0, axis=axis) norm2_list.append(norm2) # Save top-n images argmins = np.argsort(norm2_list) for i in range(args.top_n): monitor_image.add(i, x_data_list[i]) matched_images = np.concatenate(x_data_list) monitor_image_tile.add(0, matched_images)
model = -1 while model < 0 or model > 7: model = input("Enter a valid number: ") try: model = int(model) except ValueError: model = -1 #Load the train and test data print("Loading the data...") y_tr, input_data_train, _ = load_csv_data("data/train.csv") y_te, input_data_test, ids_test = load_csv_data("data/test.csv") #Preprocess train and test data print("Preprocessing the data...") tx_tr = preprocess(input_data_train) tx_te = preprocess(input_data_test) #Compute the optimal weights print("Computing the optimal weights...") losses, optimal_weights = choose_model(y_tr, tx_tr, models[model], np.zeros(tx_tr.shape[1]), 500, 2e-6, 0.0008) print("Test accuracy: ", compute_accuracy(y_te, tx_te, optimal_weights)) print("Training accuracy: ", compute_accuracy(y_tr, tx_tr, optimal_weights)) y_pred = predict_labels(optimal_weights, tx_te) create_csv_submission(ids_test, y_pred, "submission.csv")
def app(): _max_width_() st.title( 'Side-by-Side Philly Zip Code Map of Residential Tax Delinquencies and Census Metrics' ) st.write( 'Interactive breakdown of total accounts and principal due for actionable delinquent residential accounts in Philly, side-by-side with income, poverty, and unemployment data from the census.' ) st.write( 'Use the dropdown menus to select a given delinquency and census metric associated with the corresponding map. Hover over an area to view the corresponding metric value and zip code number. While far from perfectly correlated, zip codes with larger total number of delinquent accounts and total principal due tend to have higher poverty levels and unemployment rates and lower median incomes.' ) main_df = preprocess() c1, c2 = st.beta_columns(2) metric = c1.selectbox('Select Delinquency Metric', ('Total Accounts', 'Total Principal Due')) philly = (40.00, -75.16) zips_geo = 'Zipcodes_Poly.geojson' with open(zips_geo) as f: zips_data = json.load(f) by_zip = filter_zip(main_df, metric) z = by_zip.values.tolist() locations = [str(int(x)) for x in by_zip.index.tolist()] map_fig = go.FigureWidget( go.Choroplethmapbox(geojson=zips_data, z=z, locations=locations, featureidkey="properties.CODE", colorscale='YlOrRd')) map_fig.update_layout(mapbox_style="carto-positron", mapbox_zoom=9, mapbox_center={ "lat": philly[0], "lon": philly[1] }) map_fig.update_layout(margin={ "r": 0, "t": 0, "l": 0, "b": 0 }, height=600, width=540) c1.plotly_chart(map_fig) acs_df = preprocess_acs() acs_metric = c2.selectbox('Select Census Metric', ('Households Median Income', 'Percent Below Poverty', 'Unemployment Rate')) acs_map_fig = go.FigureWidget( go.Choroplethmapbox(geojson=zips_data, z=acs_df['_'.join( [w for w in acs_metric.split(' ')])], locations=acs_df['Zip_Code'], featureidkey="properties.CODE", colorscale='YlOrRd')) acs_map_fig.update_layout(mapbox_style="carto-positron", mapbox_zoom=9, mapbox_center={ "lat": philly[0], "lon": philly[1] }) acs_map_fig.update_layout(margin={ "r": 0, "t": 0, "l": 0, "b": 0 }, height=600, width=540) c2.plotly_chart(acs_map_fig)
def main(): soup = getsoup(SPEC_PAGE) cleansoup = preprocess(soup) items = cleansoup.find_all(class_="tech_spec_wrap spec_toggle") specs = getspecs(items) print(specs)
def predict(): """ This function should handle a POST request by running the model through my pipeline and then predicting on it. """ if request.method == "GET": return render_template("input.html") if request.method == "POST": headline = request.form.get("headline") # check to see if the user entered a url: """ The url is defined as starting with http. """ urls = find_url(headline) if len(urls) > 1: message = "please only submit 1 url at a time" return render_template("apology.html", message=message) if urls: """ Bug, if a url is entered that has no h1-h4 tags, an error is thrown FIXED Bug, some urls just don't return anything at all. """ try: # retrieve the urls using the function created in helpers.py prediction_dfs = [ predict_on_html(get_html_series(url), model, tfidf) for url in urls ] clickbait_proportion = np.mean( [df.target.mean() for df in prediction_dfs]) df = prediction_dfs[0] total_headlines = len(df) num_bait = len(df[df.target == 1]) num_norm = len(df[df.target == 0]) str_percentage = str(round((clickbait_proportion * 100), 0)) except: message = "Bait 'n' Switch was unable to parse the website you provided" return render_template("apology.html", message=message) return render_template("url_prediction.html", proportion=(clickbait_proportion), percentage=str_percentage, total_headlines=total_headlines, num_bait=num_bait, num_norm=num_norm) # Check if the headline is at least 4 words long headline_length = len(headline.split()) if headline_length <= 3: return render_template("too_short.html", headline_length=str(headline_length)) # to see if the user entered a url # Clean headline headline = preprocess(headline, length=0) # Convert the headline to a series headline_series = pd.Series(data=(headline), index=[0]) # Use the prefit tfidf vectorizer to transform the headline headline_tfidf = tfidf.transform(headline_series) # Predict on the tfidf headline prediction = model.predict(headline_tfidf) # Send headline prediction to display return render_template("success.html", headline=headline_series[0], prediction=prediction)