class PositionAdapter: def __init__(self, tip_names=[ "thumbTip_IK", "indexTip_IK", "middleTip_IK", "ringTip_IK", "littleTip_IK", ], arm_name="arm", hand_name="hand"): ctrl = bge.logic.getCurrentController() self._ob = ctrl.owner self._bone_channels = [self._ob.channels[tip] for tip in tip_names] self._bone_channels.append(self._ob.channels[arm_name]) self._hand_name = hand_name self._hand_channel = self._ob.channels[hand_name] self._transformer = Transformer() def translate(self, matrix): # TODO: query arm lenght instead fix value matrix[5][1] -= 2.41533 # modify arm position with arm lenght # translate finger and arm for i in range(len(matrix) - 1): loc = matrix[i] self._transformer.translate(self._bone_channels[i], self._ob, loc) # rotate hand rotation = matrix[-1] self._transformer.rotate(self._hand_channel, rotation) # update self._ob.update()
def run_transform(self): """Runs the basic FFT demonstration""" name = self.box.get() try: worker = Transformer(name) worker.run() del worker, name gc.collect() except FileNotFoundError as er: print(er)
def run_show_primitives(): """Demonstrates the FT images of primitive forms""" transformers = [ Transformer('round.jpg'), Transformer('square.png'), Transformer('triangle.png') ] for trans in transformers: trans.plot() trans.transform().shift() trans.plot_fft('Shifted FT of') Transformer.show_all() transformers.clear() del transformers gc.collect()
def run_filter(self): """Runs the filter class work demonstration""" name = self.box.get() try: fraction = float(self.fracbox.get()) fl1 = fl.Filter(name, fraction) fl1.plot(name) fl1.low_pass_filter() fl2 = fl.Filter(name, fraction) fl2.high_pass_filter() Transformer.show_all() del fl1, fl2, name gc.collect() except Exception as er: print(er)
def fit(self, Xmask, y): pr = prepare.Prepare_0(model=10, preproc=1, min_df=1, use_svd=False, tfidf=2, stemmer=0) (X_all_df, _, BP, params) = pr.load_transform(update=False) names = list(X_all_df.columns) X_all = np.asarray(X_all_df) self.X_all, self.names = X_all, names clf0 = GaussianNB() clf1 = MultinomialNB(alpha=0.8) clf2 = BernoulliNB(alpha=1, binarize=0.01) clf = clf1 self.rd = Pipeline([ ("trans", Transformer(names=self.names, X_all=X_all, BP=BP)), #("scaler",StandardScaler(with_mean=False)), ("est", clf) ]) self.rd.fit(Xmask, np.asarray(y)) return self
def fit(self, Xmask, y): pr = self._get_featureset() (X_all_df,_,BP,params) = pr.load_transform(update=False) names = list(X_all_df.columns) X_all = np.asarray(X_all_df) self.X_all, self.names = X_all, names logger.debug('Fit: use_stats=%s,use_table=%s,predict_bp=%s,use_scaler=%s', self.use_stats,self.use_table,self.predict_bp,self.use_scaler) logger.debug('Fit: bst_bgram=%s,bst_minc=%s,bst_title=%s,bst_body=%s,bst_url=%s', self.bst_bgram,self.bst_minc,self.bst_title,self.bst_body,self.bst_url) PipelineList = [] PipelineList.append( ("trans", Transformer(names=self.names, use_best=self.use_best, use_bp=self.use_bp, use_stats=self.use_stats, use_table=self.use_table, bst_bgram=self.bst_bgram,bst_minc=self.bst_minc,bst_title=self.bst_title, bst_body=self.bst_body,bst_url=self.bst_url, predict_bp=self.predict_bp, bp_clfs=self.bp_clfs, X_all=X_all, BP=BP)) ) if self.use_scaler > 0: PipelineList.append( ("scaler",StandardScaler(with_mean=(self.use_scaler>1))) ) self._pipeline_append(PipelineList) PipelineList.append( ("est", self._get_clf(self.clf)) ) self.rd = Pipeline(PipelineList) logger.debug('Pipline: %s',[(k,v.__class__.__name__) for k,v in PipelineList]) logger.debug("Pipeline.estimator=%s",dict(PipelineList)['est']) self.rd.fit(Xmask,np.asarray(y)) return self
def get_hottest_day_formatted(): """ Finds the hottest day from the two .csv data files, using the output from the Transformer. Prints a formatted result which is more convenient for the user. """ transformer = Transformer() hottest_day = transformer.find_hottest_day() # rename the columns to more understandable names hottest_day.columns = ['Date', 'Temperature', 'Region'] print( hottest_day.to_string(formatters={ "Date": lambda x: "{:%d-%m-%Y}".format(pd.to_datetime(x)) }, index=False))
def __init__(self, tip_names=[ "thumbTip_IK", "indexTip_IK", "middleTip_IK", "ringTip_IK", "littleTip_IK", ], arm_name="arm", hand_name="hand"): ctrl = bge.logic.getCurrentController() self._ob = ctrl.owner self._finger_channels = [self._ob.channels[tip] for tip in tip_names] self._arm_channels = self._ob.channels[arm_name] self._hand_name = hand_name self._hand_channel = self._ob.channels[hand_name] self._transformer = Transformer()
def fit(self, Xmask, y): pr = prepare.Prepare_0(model=14, n_components=512, preproc=1, min_df=1, use_svd=True, tfidf=2, stemmer=0) (X_all_df, _, BP, params) = pr.load_transform(update=False) names = list(X_all_df.columns) X_all = np.asarray(X_all_df) self.X_all, self.names = X_all, names clf1 = lm.LogisticRegression(penalty='l2', dual=True, tol=0.00001, C=1, fit_intercept=True, intercept_scaling=1.0, class_weight=None, random_state=random_state) class LassoCV_proba(lm.LassoCV): def predict_proba(self, X): print 'alpha_:', self.alpha_ y = self.predict(X) y = 1. / (1 + np.exp(-(y - 0.5))) return np.vstack((1 - y, y)).T class RidgeCV_proba(lm.RidgeCV): def predict_proba(self, X): print 'alpha_:', self.alpha_ y = self.predict(X) if 0: y_min, y_max = y.min(), y.max() if y_max > y_min: y = (y - y_min) / (y_max - y_min) else: y = 1. / (1 + np.exp(-(y - 0.5))) return np.vstack((1 - y, y)).T clf2 = RidgeCV_proba(alphas=np.linspace(0, 10), cv=4) clf3 = LassoCV_proba(alphas=None, cv=4) clf4 = svm.SVR(C=3, kernel='linear') clf = clf1 self.rd = Pipeline([ ("trans", Transformer(names=self.names, X_all=X_all, BP=BP)), #("scaler",StandardScaler(with_mean=False)), #("filter",lm.LogisticRegression(penalty='l1', dual=False, tol=0.0001, C=1, fit_intercept=True, intercept_scaling=1.0, class_weight=None, random_state=random_state)), ("est", clf) ]) self.rd.fit(Xmask, np.asarray(y)) return self
def main(): inp = alsaaudio.PCM(alsaaudio.PCM_CAPTURE) inp.setchannels(CHANNELS) inp.setrate(RATE) inp.setformat(alsaaudio.PCM_FORMAT_S16_LE) inp.setperiodsize(CHUNK) out = alsaaudio.PCM(alsaaudio.PCM_PLAYBACK) out.setchannels(CHANNELS) out.setrate(RATE) out.setformat(alsaaudio.PCM_FORMAT_S16_LE) out.setperiodsize(CHUNK + 1000) transformer = Transformer(pitch.pitch_up) while True: l, data = inp.read() if data: transformed = transformer.transform(data) out.write(transformed)
def __init__(self, args): self.estimator = Estimator(emb_dim=args.emb_dim, n_hidden=args.n_hidden, bidirectional=args.bi, n_layer=args.n_layer, dropout=args.dropout, lr=args.lr, decay=args.decay, lr_p=args.lr_p, clip=args.clip, batch_size=args.batch, epoch_num=args.epoch_num, cuda=args.cuda, path=args.path) self.transformer = Transformer(prolog_grammar.GRAMMAR_DICTIONARY, prolog_grammar.ROOT_RULE) self.performances = [] self.actions = [] self.path = args.path
def run_timing(): """Runs a timing analysis of the FTT algorithm""" try: screwdriver = Transformer('6A(142).BMP') # 320x240 = 76800 p vase = Transformer('vase.jpg') # 320x400 = 128000 p city = Transformer('DUSS.BMP') # 672x473 = 317856 p round_tr = Transformer('round.jpg') # 800x800 = 640000 p dew = Transformer( 'jankaluza_dew_drop.jpg') # 3840x2562 = 9838080 p fire = Transformer('vovalente_fire.jpg') # 5077x3385 = 17185645 p except FileNotFoundError as er: print(er) sys.exit() with open('timing-fft.txt', 'w') as out: out.write('# number TIME\n') transformers = [screwdriver, vase, city, round_tr, dew, fire] sizes = [76800, 128000, 317856, 640000, 9838080, 17185645] for size, transformer in zip(sizes, transformers): start = time.time() transformer.transform() end = time.time() with open('timing-fft.txt', 'a') as log: log.write(str(size) + ' ' + str(end - start) + '\n') transformers.clear() gc.collect() print('Timing completed successfully')
class TestTransformer(TestCase): def setUp(self): self.transformer = Transformer() self.project_dir = os.path.abspath(__file__ + "/../../") def tearDown(self): for f in glob.glob(self.project_dir + "/resources/*.parquet.gzip"): os.remove(f) def test___init__(self): result = self.transformer.find_hottest_day() print(result) self.assertEqual(1, len(result.index)) self.assertEqual('2016-03-17T00:00:00', str(result['ObservationDate'].iloc[0])) self.assertEqual(15.8, float(result['ScreenTemperature'].iloc[0])) self.assertEqual('Highland & Eilean Siar', str(result['Region'].iloc[0]))
class PositionAdapter: def __init__(self, tip_names=[ "thumbTip_IK", "indexTip_IK", "middleTip_IK", "ringTip_IK", "littleTip_IK", ], arm_name="arm", hand_name="hand"): ctrl = bge.logic.getCurrentController() self._ob = ctrl.owner self._finger_channels = [self._ob.channels[tip] for tip in tip_names] self._arm_channels = self._ob.channels[arm_name] self._hand_name = hand_name self._hand_channel = self._ob.channels[hand_name] self._transformer = Transformer() def translate(self, matrix): # TODO: query arm lenght instead fix value # matrix[5][1] -= 2.41533 # modify arm position with arm lenght print("update 0", self._finger_channels[0].location) # translate arm self._transformer.translate(self._arm_channels, self._ob, matrix[-2]) # rotate hand rotation = matrix[-1] self._transformer.rotate(self._arm_channels, rotation) # update self._ob.update() print('f1', self._finger_channels[0].location) print('f1', self._finger_channels[0].rotation_quaternion) # translate finger for i in range(len(matrix) - 2): loc = matrix[i] self._transformer.translate(self._finger_channels[i], self._ob, loc) # update self._ob.update() print('f2', self._finger_channels[0].location)
def _transform_dataset_test(self, isLocal: bool, dataset_name: str, output_format: str): paths = list(get_paths(isLocal, dataset_name, output_format)) transform = Transformer(self._spark, paths[0]) paths.pop(0) if dataset_name == 'country': temperature_input_path, country_dict_input_path, output_path = paths data = transform.transform_country(country_dict_input_path, temperature_input_path, output_path) elif dataset_name == 'airport': input_path, output_path = paths data = transform.transform_airports(input_path, output_path) elif dataset_name == 'us_state': demographics_input_path, us_state_dict_input_path, output_path = paths data = transform.transform_us_state(us_state_dict_input_path, demographics_input_path, output_path) elif dataset_name == 'immigration': input_path, date_output_path, img_output_path = paths data = transform.transform_immigration(input_path, img_output_path, date_output_path) self.assertTrue(data is not None)
params['url'] = data['url'] load = Loader() load.setParams(params) data = load.loadHtml().data # ------------------ # # Transform the data # # ------------------ # # setting parameters jsonFp = './states.json' colName = 'State:' # column name to filter data # instantiate transformer transform = Transformer(data) transform.setTransformDictionary(jsonFp) # get long values of the states statesDict = transform.flipTransformDictionary() states = statesDict.keys() # filter the data transform.filterData(colName, states) statesAbbr = [statesDict[state] for state in transform.data[colName]] transform.addCol(1, 'StateAbbr', statesAbbr) # rename column values transform.renameCol(0, 'State') # set index
def train(net, train_set, valid_set, train_params, logger=None, prefix=''): # unpack arguments x_train, y_train = train_set x_valid, y_valid = valid_set # convert to bc01 order, train set will be converted in Transformer #x_valid = np.rollaxis(x_valid, 3, 1) x_valid = x_valid[:, np.newaxis, ...] BATCH_SIZE = train_params['batch_size'] IMAGE_SIZE = train_params['image_size'] MOMENTUM = train_params['momentum'] MAX_EPOCH = train_params['epochs'] LEARNING_RATE_SCHEDULE = train_params['lr_schedule'] L2 = train_params.get('L2', 0.) output = net['output'] print("Starting dataset loader...") queue = Queue(5) transform = Transformer(x_train, y_train, queue, batch_size=BATCH_SIZE) transform.start() # allocate symbolic variables for theano graph computations batch_index = T.iscalar('batch_index') X_batch = T.tensor4('x') y_batch = T.fmatrix('y') # allocate shared variables for images, labels and learing rate x_shared = theano.shared(np.zeros((BATCH_SIZE, 1, IMAGE_SIZE, IMAGE_SIZE), dtype=theano.config.floatX), borrow=True) y_shared = theano.shared(np.zeros((BATCH_SIZE, 2), dtype=theano.config.floatX), borrow=True) learning_rate = theano.shared(np.float32(LEARNING_RATE_SCHEDULE[0])) out_train = lasagne.layers.get_output(output, X_batch, deterministic=False) out_val = lasagne.layers.get_output(output, X_batch, deterministic=True) loss_train = T.mean(lasagne.objectives.squared_error(out_train, y_batch))# + L2 * regularize_network_params(output, l2) loss_val = T.mean(lasagne.objectives.squared_error(out_val, y_batch))# + L2 * regularize_network_params(output, l2) # collect all model parameters all_params = lasagne.layers.get_all_params(output) # generate parameter updates for SGD with Nesterov momentum updates = lasagne.updates.nesterov_momentum( loss_train, all_params, learning_rate, MOMENTUM) logger.info("Compiling theano functions...") # create theano functions for calculating losses on train and validation sets iter_train = theano.function( [], [loss_train], updates=updates, givens={ X_batch: x_shared, #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE], y_batch: y_shared, #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE], }, ) iter_valid = theano.function( [], [loss_val, out_val], givens={ X_batch: x_shared, y_batch: y_shared, }, ) ################### # Actual training # ################### n_train_batches = x_train.shape[0] // BATCH_SIZE n_val_batches = x_valid.shape[0] // BATCH_SIZE # keep track of networks best performance and save net configuration best_epoch = 0 best_valid = 1. best_auc = 0. # epoch and iteration counters epoch = 0 _iter = 0 # wait for at least this many epochs before saving the model min_epochs = 0 # store these values for learning curves plotting train_loss = [] valid_loss = [] aucs = [] # wait for this many epochs if the validation error is not increasing patience = 10 now = time.time() logger.info("| Epoch | Train err | Validation err | ROC AUC | Ratio | Time |") logger.info("|---------------------------------------------------------------|") try: # get next chunks of data while epoch < MAX_EPOCH: if epoch in LEARNING_RATE_SCHEDULE: learning_rate.set_value(LEARNING_RATE_SCHEDULE[epoch]) epoch += 1 x_next, y_next = queue.get() losses = [] while x_next is not None: x_shared.set_value(x_next, borrow=True) y_shared.set_value(y_next, borrow=True) l = iter_train() losses.append(l) x_next, y_next = queue.get() avg_train_loss = np.mean(losses) # average the predictions across 5 patches: corners and center losses = [] for idx in xrange(n_val_batches - 1): x_shared.set_value(x_valid[idx * BATCH_SIZE: (idx + 1) * BATCH_SIZE]) y_shared.set_value(y_valid[idx * BATCH_SIZE: (idx + 1) * BATCH_SIZE]) vloss, out_val = iter_valid() losses.append(vloss) avg_valid_loss = np.mean(losses) logger.info("|%6d | %9.6f | %14.6f | %7.5f | %1.3f | %6d |" % (epoch, avg_train_loss, avg_valid_loss, 0, avg_valid_loss / avg_train_loss, time.time() - now)) # keep track of these for future analysis train_loss.append(avg_train_loss) valid_loss.append(avg_valid_loss) # if this is the best kappa obtained so far # save the model to make predictions on the test set # if auc > best_auc: # # always wait for min_epochs, to avoid frequent saving # # during early stages of learning # if epoch >= min_epochs: # save_network(net, filename=os.path.join(prefix, 'net.pickle')) # np.save(os.path.join(prefix, "val_predictions.npy"), valid_probas) # valid_features = feats / 5 # np.save(os.path.join(prefix, "val_features.npy"), valid_features) # best_auc = auc # best_epoch = epoch # patience = 10 except KeyboardInterrupt: logger.info("Trainig interrupted on epoch %d" % epoch) elapsed_time = time.time() - now logger.info("The best auc: %.5f obtained on epoch %d.\n The training took %d seconds." % (best_auc, best_epoch, elapsed_time)) logger.info(" The average performance was %.1f images/sec" % ( (len(x_train) + len(y_train)) * float(epoch) / elapsed_time)) results = np.array([train_loss, valid_loss, aucs], dtype=np.float) np.save(os.path.join(prefix, "training.npy"), results) transform.terminate() transform.join()
from transform import Transformer if __name__ == '__main__': shapes = ['round.jpg', 'square.png', 'triangle.png'] for sh in shapes: trans = Transformer(sh) trans.transform() trans.plot_fft('FT of') trans.shift() trans.plot_fft('Shifted FT of') Transformer.show_all() tr = Transformer('vase.jpg') tr2 = Transformer('face.jpg') tr.plot('Original vase') tr2.plot('Original face') tr.transform() tr.plot_fft('FT') tr2.transform() tr2.plot_fft('FT') tr.shift() tr.plot_fft('Shifted FT') tr2.shift() tr2.plot_fft('Shifted FT') Transformer.show_all()
super().inverse() self.plot('Low pass filtered image', 'gray', False) def high_pass_filter(self): """This method applies the high pass filter to the image""" super().transform() self._data[0:int(self.r * self.fraction), 0:int(self.c * self.fraction)] = 0 self._data[int(self.r * (1 - self.fraction)):self.r, 0:int(self.c * self.fraction)] = 0 self._data[0:int(self.r * self.fraction), int(self.c * (1 - self.fraction)):self.c] = 0 self._data[int(self.r * (1 - self.fraction)):self.r, int(self.c * (1 - self.fraction)):self.c] = 0 self._plotting = np.abs(self._data) self.plot_fft('High pass filtered spectrum') self.shift() self.plot_fft('High pass filtered spectrum with shift') super().inverse() self.plot('High pass filtered image', 'gray', False) if __name__ == '__main__': fl1 = Filter('6A(142).BMP', 0.07) fl1.plot('6A(142).BMP') fl1.low_pass_filter() fl2 = Filter('6A(142).BMP', 0.07) fl2.high_pass_filter() Transformer.show_all()
target_dict_list = final_dict_list if full_soql_query_mode else envdata opps = target_dict_list[0]['records'] service_orders = target_dict_list[2]['records'] quotes = target_dict_list[5]['records'] cor_forms = target_dict_list[6]['records'] cap_projects = target_dict_list[3]['records'] expense_builders = target_dict_list[4]['records'] npv_tasks = target_dict_list[1]['records'] print( 'All data successfully queried. Any errors after this point are due to DATA VALIDATION ONLY.' ) t = Transformer(opps, service_orders, quotes, cor_forms, cap_projects, expense_builders) valid_opp_to_service_orders = t.validate_opp_to_service_order() valid_opp_to_quote_or_cor_form = t.validate_opp_to_quote_or_cor_form( valid_opp_to_service_orders) standardized_opp_to_cp_or_eb = t.standardize_opp_to_cp_or_eb( valid_opp_to_quote_or_cor_form) valid_opp_to_cp_or_eb = t.validate_opp_to_cp_or_eb( valid_opp_to_quote_or_cor_form, standardized_opp_to_cp_or_eb) # print(valid_opp_to_cp_or_eb) ## All validation stages passed, applicable NPV tasks can now be closed: l = Loader(valid_opp_to_cp_or_eb, npv_tasks) if full_soql_query_mode else None tasks_closed = l.load_tasks() if full_soql_query_mode else [] if len(tasks_closed) == 0: print('0 NPV tasks validated by automation.') elif len(tasks_closed) > 0:
def train(net, train_set, valid_set, train_params, logger=None, prefix=''): # unpack arguments x_train, y_train = train_set x_valid, y_valid = valid_set # convert to bc01 order, train set will be converted in Transformer #x_valid = np.rollaxis(x_valid, 3, 1) x_valid = x_valid[:, np.newaxis, ...] BATCH_SIZE = train_params['batch_size'] IMAGE_SIZE = train_params['image_size'] MOMENTUM = train_params['momentum'] MAX_EPOCH = train_params['epochs'] LEARNING_RATE_SCHEDULE = train_params['lr_schedule'] L2 = train_params.get('L2', 0.) output = net['output'] print("Starting dataset loader...") queue = Queue(5) transform = Transformer(x_train, y_train, queue, batch_size=BATCH_SIZE) transform.start() # allocate symbolic variables for theano graph computations batch_index = T.iscalar('batch_index') X_batch = T.tensor4('x') y_batch = T.fmatrix('y') # allocate shared variables for images, labels and learing rate x_shared = theano.shared(np.zeros((BATCH_SIZE, 1, IMAGE_SIZE, IMAGE_SIZE), dtype=theano.config.floatX), borrow=True) y_shared = theano.shared(np.zeros((BATCH_SIZE, 2), dtype=theano.config.floatX), borrow=True) learning_rate = theano.shared(np.float32(LEARNING_RATE_SCHEDULE[0])) out_train = lasagne.layers.get_output(output, X_batch, deterministic=False) out_val = lasagne.layers.get_output(output, X_batch, deterministic=True) loss_train = T.mean(lasagne.objectives.squared_error( out_train, y_batch)) # + L2 * regularize_network_params(output, l2) loss_val = T.mean(lasagne.objectives.squared_error( out_val, y_batch)) # + L2 * regularize_network_params(output, l2) # collect all model parameters all_params = lasagne.layers.get_all_params(output) # generate parameter updates for SGD with Nesterov momentum updates = lasagne.updates.nesterov_momentum(loss_train, all_params, learning_rate, MOMENTUM) logger.info("Compiling theano functions...") # create theano functions for calculating losses on train and validation sets iter_train = theano.function( [], [loss_train], updates=updates, givens={ X_batch: x_shared, #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE], y_batch: y_shared, #[batch_index * BATCH_SIZE: (batch_index + 1) * BATCH_SIZE], }, ) iter_valid = theano.function( [], [loss_val, out_val], givens={ X_batch: x_shared, y_batch: y_shared, }, ) ################### # Actual training # ################### n_train_batches = x_train.shape[0] // BATCH_SIZE n_val_batches = x_valid.shape[0] // BATCH_SIZE # keep track of networks best performance and save net configuration best_epoch = 0 best_valid = 1. best_auc = 0. # epoch and iteration counters epoch = 0 _iter = 0 # wait for at least this many epochs before saving the model min_epochs = 0 # store these values for learning curves plotting train_loss = [] valid_loss = [] aucs = [] # wait for this many epochs if the validation error is not increasing patience = 10 now = time.time() logger.info( "| Epoch | Train err | Validation err | ROC AUC | Ratio | Time |") logger.info( "|---------------------------------------------------------------|") try: # get next chunks of data while epoch < MAX_EPOCH: if epoch in LEARNING_RATE_SCHEDULE: learning_rate.set_value(LEARNING_RATE_SCHEDULE[epoch]) epoch += 1 x_next, y_next = queue.get() losses = [] while x_next is not None: x_shared.set_value(x_next, borrow=True) y_shared.set_value(y_next, borrow=True) l = iter_train() losses.append(l) x_next, y_next = queue.get() avg_train_loss = np.mean(losses) # average the predictions across 5 patches: corners and center losses = [] for idx in xrange(n_val_batches - 1): x_shared.set_value(x_valid[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE]) y_shared.set_value(y_valid[idx * BATCH_SIZE:(idx + 1) * BATCH_SIZE]) vloss, out_val = iter_valid() losses.append(vloss) avg_valid_loss = np.mean(losses) logger.info("|%6d | %9.6f | %14.6f | %7.5f | %1.3f | %6d |" % (epoch, avg_train_loss, avg_valid_loss, 0, avg_valid_loss / avg_train_loss, time.time() - now)) # keep track of these for future analysis train_loss.append(avg_train_loss) valid_loss.append(avg_valid_loss) # if this is the best kappa obtained so far # save the model to make predictions on the test set # if auc > best_auc: # # always wait for min_epochs, to avoid frequent saving # # during early stages of learning # if epoch >= min_epochs: # save_network(net, filename=os.path.join(prefix, 'net.pickle')) # np.save(os.path.join(prefix, "val_predictions.npy"), valid_probas) # valid_features = feats / 5 # np.save(os.path.join(prefix, "val_features.npy"), valid_features) # best_auc = auc # best_epoch = epoch # patience = 10 except KeyboardInterrupt: logger.info("Trainig interrupted on epoch %d" % epoch) elapsed_time = time.time() - now logger.info( "The best auc: %.5f obtained on epoch %d.\n The training took %d seconds." % (best_auc, best_epoch, elapsed_time)) logger.info(" The average performance was %.1f images/sec" % ((len(x_train) + len(y_train)) * float(epoch) / elapsed_time)) results = np.array([train_loss, valid_loss, aucs], dtype=np.float) np.save(os.path.join(prefix, "training.npy"), results) transform.terminate() transform.join()
from transform import Transformer import time import sys if __name__ == '__main__': try: screwdriver = Transformer('6A(142).BMP') # 320x240 = 76800 p vase = Transformer('vase.jpg') # 320x400 = 128000 p city = Transformer('DUSS.BMP') # 672x473 = 317856 p round_tr = Transformer('round.jpg') # 800x800 = 640000 p dew = Transformer('jankaluza_dew_drop.jpg') # 3840x2562 = 9838080 p fire = Transformer('vovalente_fire.jpg') # 5077x3385 = 17185645 p except FileNotFoundError as er: print(er) sys.exit() with open('timing-fft.txt', 'w') as out: out.write('# number TIME\n') transformers = [screwdriver, vase, city, round_tr, dew, fire] sizes = [76800, 128000, 317856, 640000, 9838080, 17185645] for size, transformer in zip(sizes, transformers): start = time.time() transformer.transform() end = time.time() with open('timing-fft.txt', 'a') as log: log.write(str(size) + ' ' + str(end - start) + '\n')
def run_comp(): """Demonstrates the FT images of complex forms""" tr = Transformer('vase.jpg') tr2 = Transformer('face.jpg') tr.plot('Original vase') tr2.plot('Original face') tr.transform() tr.plot_fft('FT') tr2.transform() tr2.plot_fft('FT') tr.shift() tr.plot_fft('Shifted FT') tr2.shift() tr2.plot_fft('Shifted FT') Transformer.show_all() del tr, tr2 gc.collect()
from log import logger from combine import Combiner from transform import Transformer from core import SoxError from core import SoxiError from version import version as __version__ import os # create transformer tfm = Transformer() # trim the audio between 5 and 10.5 seconds. tfm.trim(5, 10.5) # apply compression tfm.compand() # apply a fade in and fade out tfm.fade(fade_in_len=1.0, fade_out_len=0.5) # create the output file. tfm.build('./input/audio.wav', './output/audio.aiff') # see the applied effects tfm.effects_log # # create combiner # cbn = Combiner() # # pitch shift combined audio up 3 semitones # cbn.pitch(3.0) # # convert output to 8000 Hz stereo # cbn.convert(samplerate=8000) # # create the output file # cbn.build( # ['input1.wav', 'input2.wav', 'input3.wav'], 'output.wav', 'concatenate' # )
def setUp(self): self.transformer = Transformer() self.project_dir = os.path.abspath(__file__ + "/../../")
def ingest(hdfsfile, file_no, datafolders): """ :type hdfsfiles: str :type config: config.Config """ try: process_info = dict() process_info['process_start_timestamp'] = datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') process_info['user_name'] = getpass.getuser() process_info['file_name'] = os.path.basename(hdfsfile) process_info['file_no'] = file_no #spark = HiveContext(spark.sparkContext) file_config = getFileConfig(hdfsfile, config) if not file_config: err_msg = "can not find matched file configuration for file %s!" % hdfsfile logger.error(err_msg) return 'Error - ' + err_msg metadata = Metadata(sc) if hdfsutil.checkDuplicate(config, file_config, hdfsfile): hdfsutil.move_to_error_archive(config, file_config, hdfsfile, process_info) metadata.log_error_table(spark, file_config, config, process_info, "duplicate file") return 'Error - a file with same name already exists in archive folder!' + \ ' Moved to error_archive "' + process_info['error_archive_path'] + '".' metadata.loadKVFile(spark, config, file_config.get('kv_file'), process_info) reader = Reader(spark) df = reader.read(hdfsfile, file_config, metadata, process_info) process_info['row_count'] = df.count() logger.info("row count in file %s is %d" % (hdfsfile, process_info['row_count'])) if process_info['row_count'] == 0: if file_config.get('empty_check', "no").lower() == "yes": hdfsutil.move_to_error_archive(config, file_config, hdfsfile, process_info) metadata.log_error_table(spark, file_config, config, process_info, "file is empty") return 'Error - Empty File!' + \ ' Moved to error_archive "' + process_info['error_archive_path'] + '".' else: hdfsutil.move_to_archive(config, file_config, hdfsfile) logger.warn('%s is empty!' % hdfsfile) return 'Success' validator = Validator(sc) validator.val_column_num(df.columns, metadata.data_types, process_info) val_error = validator.get_error() if val_error: hdfsutil.move_to_error_archive(config, file_config, hdfsfile, process_info) metadata.log_error_table(spark, file_config, config, process_info, val_error) return 'Error - ' + val_error + \ ' Moved to error_archive "' + process_info['error_archive_path'] + '".' transformer = Transformer(sc) df = transformer.trans_data_types(spark, df, file_config, metadata, process_info) df = validator.val_data_types(spark, df, file_config, metadata, process_info) transformedColumns = [col for col in df.columns if col[:2] == '__'] writer = Writer(sc) val_error = validator.get_error() if val_error: error_df = df.where('length(_error_message) > 0').drop( *transformedColumns) writer.write_errorfile(error_df, config, file_config, process_info) df.unpersist() logger.error('file %s failed at data type validation' % hdfsfile) hdfsutil.move_to_error_archive(config, file_config, hdfsfile, process_info) metadata.log_error_table(spark, file_config, config, process_info, val_error) return 'Error - ' + val_error + \ ' Moved to error_archive "' + process_info['error_archive_path'] + '".' + \ ' Error file path "' + process_info['error_file_path'] + '".' else: orig_columns = [col[1:] for col in transformedColumns] data_df = df.drop(*orig_columns) \ .drop('_error_message') \ .withColumn('source_filename', lit(os.path.basename(hdfsfile))) \ .withColumn('process_timestamp', to_timestamp(lit(process_info['process_start_timestamp']), 'yyyy-MM-dd HH:mm:ss')) writer.write_orc(data_df, spark, config, metadata, process_info) datafolders.add(process_info['hdfs_datafile_path']) df.unpersist() hdfsutil.move_to_archive(config, file_config, hdfsfile) process_info['process_end_timestamp'] = datetime.utcnow().strftime( '%Y-%m-%d %H:%M:%S') metadata.log_registry_table(spark, file_config, config, process_info) logger.info('file %s has been successfully ingested' % hdfsfile) return 'Success' except Exception as e: logger.error('file %s ingestion failed! exception is %s' % (hdfsfile, str(e))) return 'Failed - Exception happened! Please see Yarn log for details'
import sys sys.path.append("/home/app/code/") from pyspark.sql import SparkSession from pyspark.sql import SQLContext from extract import Extract from transform import Transformer from load import Load if __name__ == '__main__': spark = SparkSession \ .builder \ .appName("Covid App") \ .config("spark.some.config.option", "some-value") \ .getOrCreate() sqlContext = SQLContext(spark) df = Extract(spark) df = df.extract_covid_data() transformer = Transformer(df, sqlContext) transformer.data_types_transformations() transformed_df = transformer.dimensions_transfomations() transformed_df = transformer.fill_na(transformed_df) loader = Load(transformed_df) loader.load_data()
class Actor: def __init__(self, args): self.estimator = Estimator(emb_dim=args.emb_dim, n_hidden=args.n_hidden, bidirectional=args.bi, n_layer=args.n_layer, dropout=args.dropout, lr=args.lr, decay=args.decay, lr_p=args.lr_p, clip=args.clip, batch_size=args.batch, epoch_num=args.epoch_num, cuda=args.cuda, path=args.path) self.transformer = Transformer(prolog_grammar.GRAMMAR_DICTIONARY, prolog_grammar.ROOT_RULE) self.performances = [] self.actions = [] self.path = args.path def search(self): self.perform('initial') #exit(0) for i in range(25): print(i) try: self.step() self.perform(i) except BaseException as e: print(e) print(self.actions) print(self.performances) with open('gra.pkl', 'wb') as f: pickle.dump(self.transformer.get_grammar_dict(), f) exit(-1) print(self.performances) #exit(0) def step(self): import time t1 = time.time() action_space = self.transformer.get_act_space() t2 = time.time() method = [] i = -1 while len(method) == 0: i = random.randint(0, 3) method = action_space[i] action = random.choice(method) print(i, action) if i == 0: self.transformer.creat_nt(action) elif i == 1: self.transformer.merge_nt(action) elif i == 2: self.transformer.combine_nt(*action) else: assert i == 3 self.transformer.delete_prod(action) self.actions.append((i, action)) def perform(self, name): grammar_dict, root_rule = self.transformer.get_grammar_dict() with open(os.path.join(self.path, f'grammar-{name}'), 'wb') as f: pickle.dump(self.transformer, f) perform = self.estimator.estimate(grammar_dict, root_rule, toy=False, name=repr(name)) self.performances.append(perform) print(perform) return perform def exp(self, name): for _ in range(100): self.step() self.perform(name) def one(self): #with open(path, 'rb') as f: # self.transformer = pickle.load(f) for i in range(50): self.step() grammar_dict, root_rule = self.transformer.get_grammar_dict() for i in range(10000): perform = self.estimator.estimate(grammar_dict, root_rule, toy=False, name='tmp') print(perform)