def __init__(self, config): """ Args: config: config file of the current model """ self.config = config # load data here d = DataLoader(self.config) # Get the filenames and labels self.filenames, self.labels = d.get_sub_dataset(self.config.image_size) # Create the Dataset using Tensorflow Data API self.dataset = tf.data.Dataset.from_tensor_slices(self.filenames) # Apply parse function to get the numpy array of the images self.dataset = self.dataset.map( map_func=self._parse_function, num_parallel_calls=self.config.num_parallel_calls) # Shuffle the dataset #self.dataset = self.dataset.shuffle(self.config.buffer_size) # Repeat the dataset indefinitely self.dataset = self.dataset.repeat(self.config.num_epochs) # Applying prefetch to increase the performance # Prefetch the next 10 batches self.dataset = self.dataset.prefetch(buffer_size=10 * config.batch_size) # Apply batching self.dataset = self.dataset.batch(config.batch_size) self.iterator = self.dataset.make_initializable_iterator() self.image = self.iterator.get_next()
def handle(self, *args, **options): url = settings.SHAREPOINT_URL username = settings.SHAREPOINT_USERNAME password = settings.SHAREPOINT_PASSWORD documents = settings.DOCUMENTS_URL loader = DataLoader(url, username, password, documents) print("Caricamento file excel") (nuovi, aggiornati) = loader.load_remote_into_db() print( f'Ho inserito {nuovi} nuovi iscritti e aggiornato gli altri {aggiornati}' )
def aggiorna_lista(self, s: list, t_user: str, t_chat: dict) -> JsonResponse: if self.check_admin(t_user, t_chat["id"]): url = settings.SHAREPOINT_URL username = settings.SHAREPOINT_USERNAME password = settings.SHAREPOINT_PASSWORD documents = settings.DOCUMENTS_URL loader = DataLoader(url, username, password, documents) send_message(f'Sto leggendo il file excel remoto', t_chat["id"]) (nuovi, aggiornati) = loader.loadRemoteIntoDb() send_message( f'Ho inserito {nuovi} nuovi iscritti e aggiornato gli altri {aggiornati}', t_chat["id"]) return JsonResponse({"ok": "POST request processed"})
def data_generation(labels_dir_path, img_dir_path, batch_size=4, inputshape=(224, 224, 3)): aug = iaa.Sequential([ iaa.Crop(px=(0, 40)), # crop images from each side by 0 to 16px (randomly chosen) iaa.Fliplr(0.5), # horizontally flip 50% of the images sometimes(iaa.Affine( scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis) rotate=(-45, 45), # rotate by -45 to +45 degrees )), ], random_order=True) dataLoaderRGB = DataLoader(batch_size=batch_size, img_dir_path=img_dir_path, labels_dir_path=labels_dir_path, input_shape=inputshape, aug=aug) return dataLoaderRGB.generator()
def __init__(self): # self.config = config # load data here d = DataLoader("./data") # Get the filenames and labels self.filenames, self.labels = d.get_sub_dataset(28) #assert len(self.filenames) == len(self.labels) # Create the Dataset using Tensorflow Data API self.dataset = tf.data.Dataset.from_tensor_slices(self.filenames) # Apply parse function to get the numpy array of the images self.dataset = self.dataset.map(self._parse_function) # Shuffle the dataset self.dataset = self.dataset.shuffle(self.filenames.get_shape()[0]) # Apply batching self.dataset = self.dataset.batch(50)
def plot_sentiment_against_utd_results( sentiment_per_week: List[float]) -> None: utd_results = DataLoader.load_utd_results() max_sent = max(sentiment_per_week) aligned_sentiment = [s / max_sent for s in sentiment_per_week] Plotter.dual_line_plot(aligned_sentiment, utd_results, labels=["sentiment", "ppw"])
def filter_on_language( dataset: List[TimestampedText]) -> List[TimestampedText]: eng_stopwords = DataLoader.load_stopwords_file() def is_english(doc: TimestampedText) -> bool: pass eng_content = [d for d in dataset if is_english(d)] return eng_content
def __init__(self, dirpath): self.optimizer = None self.learning_rate = None self.batch_size = None self.metrics = None self.loss = None self.epochs = None self.dirpath = dirpath self.tensorboard = self.create_tb_callbacks("./tensorboards/"+self.dirpath+'/'+type(self).__name__) self.name_model = os.getcwd() + '/saved_models/'+self.dirpath+'/'+type(self).__name__+".h5" self.data_gen = DataLoader(dirpath=dirpath, batch_size=self.batch_size, downsample_factor=0) self.data_gen.build_data() self.output_size = self.data_gen.get_output_size() img_w = self.data_gen.img_w img_h = self.data_gen.img_h if K.image_data_format() == 'channels_first': self.input_shape = (1, img_w, img_h) else: self.input_shape = (img_w, img_h, 1)
class EnvMaker: def __init__(self) -> None: self.dl = DataLoader() self.train, self.test = pd.DataFrame(), pd.DataFrame() def make_dummy_env(self, dataset, env_args): env = gym.make("crypt-v001", df=dataset, **env_args) check_env(env) env = DummyVecEnv([lambda: env]) env = VecCheckNan(env, raise_exception=True) env = VecNormalize( env, norm_obs=True, norm_reward=False, clip_obs=10.0, gamma=0.95 ) return env def make_dummy_train_test_env(self, env_args): if (self.train.empty) or (self.test.empty) : self.train, self.test = self.dl.get_train_test_dataset() train_env = self.make_dummy_env(dataset=self.train, env_args=env_args) test_env = self.make_dummy_env(dataset=self.test, env_args=env_args) return train_env, test_env def make_vec_env(self,dataset, env_args): env_args["df"]= dataset env = make_vec_env('crypt-v001', env_kwargs=env_args) env = VecCheckNan(env, raise_exception=True) env = VecNormalize( env, norm_obs=True, norm_reward=False, clip_obs=10.0, gamma=0.95 ) return env def make_vec_train_test_env(self,env_args): if (self.train.empty) or (self.test.empty) : self.train, self.testd = self.dl.get_train_test_dataset() train_env = self.make_vec_env(dataset=self.train, env_args=env_args) test_env = self.make_vec_env(dataset=self.test, env_args=env_args) return train_env, test_env
def run_pipeline() -> List[TimestampedText]: dataset = DataLoader.load_dataset_from_file() print(f"Loaded dataset with {len(dataset)} entries") no_junk_in_my_trunk = remove_junk_content(dataset) print( f"Junk removal removed {len(dataset) - len(no_junk_in_my_trunk)}. {len(no_junk_in_my_trunk)} remaining." ) only_english_entries = filter_on_language(no_junk_in_my_trunk) print( f"Language filter removed {len(no_junk_in_my_trunk) - len(only_english_entries)}. {len(only_english_entries)} remaining." ) only_relevant = relevant_sentences(only_english_entries) print(len(only_relevant), "sentences selected.") return only_relevant
def main(): loader = DataLoader(source_dir=SOURCE_DIR, source_ext=SOURCE_EXT, solution_dir=SOLUTION_DIR, solution_ext=SOLUTION_EXT) n, flow_matrix, distance_matrix = loader.load_source(FILES[FILE_INDEX]) optimal_sol = loader.load_results(FILES[FILE_INDEX])[1] results = genetic_solver(n, flow_matrix, distance_matrix) # optimal_sol = brute_force_solver(n, flow_matrix, distance_matrix)[0] random_res = random_solver(n, flow_matrix, distance_matrix, TIMES_RANDOM)[0] greedy_res = greedy_solver(n, flow_matrix, distance_matrix) save_results_to_csv(results) graph_filename = 'graph' graph_path = os.path.join(RESULTS_DIR, (graph_filename + '.png')) plot_graph(results, path=graph_path, random_res=random_res, greedy_res=greedy_res, optimal_sol=optimal_sol)
def run(args): loader = DataLoader() logger.info("Loading data Gram matrices ...") X0, Xte0, X1, Xte1, X2, Xte2 = get_kernels(args, loader) ytr0, ytr1, ytr2 = get_labels(loader) logger.info("Prediction on dataset 0 ...") ypred0 = predict(X0, ytr0, 1.6e-3, Xte0) logger.info("Prediction on dataset 1 ...") ypred1 = predict(X1, ytr1, 1.1e-3, Xte1) logger.info("Prediction on dataset 2...") ypred2 = predict(X2, ytr2, .00081895, Xte2) list_preds = [] for y_pred_test in [ypred0, ypred1, ypred2]: y_pred_test[y_pred_test == -1] = 0 y_pred_test = y_pred_test.astype(int) list_preds += y_pred_test.tolist() with open("Yte.csv", 'w') as f: f.write('Id,Bound\n') for i in range(len(list_preds)): f.write(str(i) + ',' + str(list_preds[i]) + '\n') logger.info("Results saved in Yte.csv! ")
from ModelBaseClass import ModelBaseClass from utils.DataLoader import DataLoader class FixedDoseBaseline(ModelBaseClass): def __init__(self, data_loader): super(FixedDoseBaseline, self).__init__(data_loader) def next_action(self, patient): return self.MED_DOSE if __name__ == '__main__': baseline = FixedDoseBaseline(DataLoader("data/warfarin_clean.csv")) cum_regret, avg_regret = baseline.evaluate_online() print("cum_regret {}, avg_regret {}".format(cum_regret, avg_regret))
def setUp(self) -> None: physical_devices = tf.config.list_physical_devices('GPU') if physical_devices is not None and len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) self.data_loader = DataLoader()
class TestSVM(unittest.TestCase): def setUp(self) -> None: physical_devices = tf.config.list_physical_devices('GPU') if physical_devices is not None and len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) self.data_loader = DataLoader() def gen_dataset(self, x, y, batch_size): x, y = tf.cast(x, dtype=tf.float32), tf.reshape(tf.cast(y, dtype=tf.float32), shape=(-1, 1)) return tf.data.Dataset.from_tensor_slices( (x, y)).batch(batch_size, drop_remainder=True) def test_linear(self): (x_train, y_train), (x_test, y_test) = self.data_loader.loadIris1(0.8) svm = LinearSVM(num_feature=2) svm.compile(optimizer=tf.optimizers.SGD(0.01), loss=svm.loss, metrics=[svm.accu]) svm.fit(x_train, y_train, batch_size=64, epochs=400, verbose=0) results = svm.evaluate(x_test, y_test) print("test result: ", results, svm.params()) self.assertGreater(results[1], 0.9) a = float(-svm.W[0] / svm.W[1]) xx = np.linspace(-2.5, 2.5) yy = a * xx - float(svm.b / svm.W[1]) self.data_loader.plot1( (0.0, 10.0), (float(-svm.b.numpy() / svm.W.numpy()[1]), float( (-svm.b.numpy() - 10 * svm.W.numpy()[0]) / svm.W.numpy()[1])), color='black').show() def test_gaussian(self): def draw(x_vals, y_vals, show=True): class1_x = [x[0] for i, x in enumerate(x_vals) if y_vals[i] == 1] class1_y = [x[1] for i, x in enumerate(x_vals) if y_vals[i] == 1] class2_x = [x[0] for i, x in enumerate(x_vals) if y_vals[i] == -1] class2_y = [x[1] for i, x in enumerate(x_vals) if y_vals[i] == -1] if show: plt.plot(class1_x, class1_y, 'ro', label='I. setosa') plt.plot(class2_x, class2_y, 'kx', label='I. versicolor') # plt.plot(class3_x, class3_y, 'gv', label='I. virginica') plt.title('Gaussian SVM Results on Iris Data') plt.xlabel('Pedal Length') plt.ylabel('Sepal Width') plt.legend(loc='lower right') plt.show() return class1_x, class1_y, class2_x, class2_y (x_vals, y_vals) = sklearn.datasets.make_circles(n_samples=3000, factor=.5, noise=.1) y_vals = np.array([1.0 if y == 1.0 else -1.0 for y in y_vals], dtype=np.float) split_ratio = 0.9 x_train, y_train = x_vals[0:int(len(x_vals) * split_ratio)], y_vals[ 0:int(len(y_vals) * split_ratio)] x_test, y_test = x_vals[int(len(x_vals) * split_ratio ):], y_vals[int(len(y_vals) * split_ratio):] draw(x_train, y_train) draw(x_test, y_test) batch_size = 256 epochs = 300 svm = GaussianKernelSVM(batch_size=batch_size) optimizer = tf.keras.optimizers.SGD(0.001) train_dataset = self.gen_dataset(x_train, y_train, batch_size) test_dataset = self.gen_dataset(x_test, y_test, 5) # train def train_step(x_sample, y_sample): with tf.GradientTape() as tape: pred_kernel = svm(x_sample, x_sample) loss = svm.loss(y_sample, pred_kernel) accu, _ = svm.accu(y_sample, y_sample, pred_kernel) gradients = tape.gradient( loss, svm.trainable_variables) # had to indent this! optimizer.apply_gradients(zip(gradients, svm.trainable_variables)) return loss, accu for epoch in range(epochs): accus, losses = [], [] for (batch, (x, y)) in enumerate(train_dataset): loss, accu = train_step(x_sample=x, y_sample=y) accus.append(accu.numpy()) losses.append(loss.numpy()) print("Epoch: {}, accu: {}, loss: {}".format( epoch, np.mean(accus), np.mean(losses))) # test rand_index = np.random.choice(len(x_vals), size=batch_size) rand_x = x_vals[rand_index] rand_y = tf.convert_to_tensor(np.transpose([y_vals[rand_index]]), dtype=tf.float32) accus = [] for (batch, (x, y)) in enumerate(test_dataset): pred_kernel = svm(x, rand_x) accu, _ = svm.accu(y, rand_y, pred_kernel) accus.append(accu) print("test accuracy: {}".format(np.mean(accus))) self.assertGreater(np.mean(accus), 0.8) # plot results x_min, x_max = x_vals[:, 0].min() - 1, x_vals[:, 0].max() + 1 y_min, y_max = x_vals[:, 1].min() - 1, x_vals[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02)) grid_points = np.c_[xx.ravel(), yy.ravel()] output_kernel = svm(grid_points, rand_x) _, predictions = svm.accu(None, rand_y, output_kernel) grid_predictions = tf.reshape(predictions, xx.shape) # Plot points and grid class1_x, class1_y, class2_x, class2_y = draw(x_vals, y_vals, False) plt.contourf(xx, yy, grid_predictions, cmap=plt.cm.Paired, alpha=0.8) plt.plot(class1_x, class1_y, 'ro', label='Class 1') plt.plot(class2_x, class2_y, 'kx', label='Class -1') plt.title('Gaussian SVM Results') plt.xlabel('x') plt.ylabel('y') plt.legend(loc='lower right') plt.ylim([-1.5, 1.5]) plt.xlim([-1.5, 1.5]) plt.show() def test_amsvm(self): (x_train, y_train) = self.data_loader.loadIris2(0.8) svm = AMSVM(num_classes=3, num_feature=2, c=0.001) # optimizer = tf.keras.optimizers.Adam(0.1) # train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(50, drop_remainder=True).shuffle( # 50) # train # def train_step(x_sample, y_sample): # with tf.GradientTape() as tape: # output = svm(x_sample) # loss = svm.loss(y_sample, output) # accu = svm.accu(y_sample, output) # gradients = tape.gradient(loss, svm.trainable_variables) # had to indent this! # optimizer.apply_gradients(zip(gradients, svm.trainable_variables)) # return loss, accu # # for epoch in range(400): # accus, losses = [], [] # for (batch, (x, y)) in enumerate(train_dataset): # loss, accu = train_step(x_sample=x, y_sample=y) # accus.append(accu.numpy()) # losses.append(loss.numpy()) # print("Epoch: {}, accu: {}, loss: {}".format(epoch, np.mean(accus), np.mean(losses))) svm.compile(optimizer=tf.optimizers.Adam(0.1), loss=svm.loss, metrics=[svm.accu]) svm.fit(x_train, y_train, batch_size=50, epochs=100)
help='# hidden nodes in generator') parser.add_argument('--disc_hidden', dest='disc_hidden', type=int, default=80, help='# hidden nodes in discriminator') parser.add_argument('--log_dir', dest='log_dir', type=str, default='plot_1', help='# log directory ') args = vars(parser.parse_args()) data_dir = "data/cropped28/" d = DataLoader(data_dir) y = np.ones((50000)) X = d.get_original_images() x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1) x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) n = args['sample_size'] x_train = x_train[0:n] x_test = x_test[n:n + n] noise_factor = 0.1
'lr': 0.001, 'batch_size': 64, 'z_size': 16, 'max_grad_norm': 5, 'top_k': 1, 'word_dropout_p': 0.2, 'kl_lss_anneal': True, # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 'model_name': 'trained_word_tVAE.model', 'beta': 1, # 在kl_loss前面在添加一个超参数,防止kl_loss坍缩 } if __name__ == '__main__': data_loader = DataLoader(Vocab('europarl_tvae', Level.WORD)) level = word_level_params() level.params['vocab_size'] = data_loader.vocab.vocab_size model = tVAE(level.encoder_params, level.decoder_params, level.params) if USE_GPU: model = model.cuda() if model.have_saved_model: model.load() else: # train model.fit(data_loader) model.save()
from utils.Sampler import Sampler from utils.plot import plotScatter from utils import Expressions from validation.Analyzer import Analyzer from validation.ClusterValidator import ClusterValidator from validation.ClassificationValidator import ClassificationValidator from multiprocessing import Array if __name__ == '__main__': print("Imported modules") dataLoader = DataLoader("dataset5") dimReducer = DimensionalityReducer() analyzer = Analyzer() clusVal = ClusterValidator() classVal = ClassificationValidator() sampler = Sampler() print("data loaded") #healthy = dataLoader.getData(["healthy"], ["THCA","LUAD"]) #healthy = sampler.over_sample(healthy) start = datetime.now() sick = dataLoader.getData(["sick"], ["all"]) healthy = dataLoader.getData(["healthy"], ["all"])
import math import random import time import torch from torch import nn from torch.nn import init from model.TransE import TransE from utils.DataLoader import DataLoader from utils.draw import draw device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') loader = DataLoader(device, dataset='FB15k') loader.load_all() loader.preprocess(1, init=False) loader.setup_sampling_map() entity_dim = 50 load_previous = False model = TransE(device, entity_size=loader.entity_size, rel_size=loader.relation_size, embed_dim=entity_dim, dataset='FB15k', margin=5) # init.uniform_(model.entity_embedding.weight,-6.0/math.sqrt(entity_dim), 6.0/math.sqrt(entity_dim)) # init.uniform_(model.rel_embedding.weight,-6.0/math.sqrt(entity_dim), 6.0/math.sqrt(entity_dim))
import sys from utils import readjson from models.SimpleRergression import Linear from utils.DataLoader import DataLoader from models.Model import Model if __name__ == '__main__': config = readjson(sys.argv[1]) linear = Linear(**config['linear']) dataloader = DataLoader(**config['dataloader']) modal = Model(linear, dataloader, **config['modal']) modal.fit()
from tf.CycleGan import CycleGan from utils.DataLoader import DataLoader import numpy as np imageShape=(32,32,3) dl=DataLoader(path="../dataset",batchSize=10,imageSize=imageShape[0]) cgan=CycleGan(imageShape[0], imageShape[1], imageShape[2], tensorboard=True) epochs=100 modelSavePath='cgan_saved' cgan.loadModel(modelSavePath) for i in range(epochs): dataset = dl.getGenerater() for data in dataset: datasetX = np.array(data[0]) datasetY = np.array(data[1]) report=cgan.train_on_batch(datasetX=datasetX,datasetY=datasetY) print(report) cgan.saveModel(modelSavePath)
def createManagementCompanyData(request): logger.info("Creating management company Data") dataloader = DataLoader(request.cls.dataFile, env) dataloader.deleteManagementAndPropertySetup() dataloader.createManagementAndProperty() request.cls.testdata = dataloader.getData()
# file that contains embedding for each amino acid embedding_file = "data/embeddings/embeddings.pkl" # file that contains protein name and sequence id2seq_file = "data/processed/"+dataset+"/protein.dictionary.tsv" # file that contains interaction dataset interactions_file = "data/processed/"+dataset+"/protein.actions.tsv" # check the device type: CPU or GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Training on ", device) # load sequences, and interactions data dataloader = DataLoader(embedding_file, id2seq_file, interactions_file, max_seq_length=args.max_length) seq2t = dataloader.seq2t # number of amino acids in the sequences # add the padding index dim = dataloader.dim + 1 # a dictionary that maps protein name to index protname2index = dataloader.protname2index # a dictionary that maps index to protein name ind2protname = {v: k for k, v in protname2index.items()} # convert the amino acid sequence to numeric tensor seq_tensor, lengths = dataloader.convert_seq_to_tensor()
from utils.DataLoader import DataLoader from utils.DimensionalityReducer import DimensionalityReducer from utils.EA.crossover import * from utils.EA.mutation import * from utils.EA.algorithm import ea_for_plot, run from utils.EA.fitness import fitness from utils.EA.ea_utils import display_stat_1 from utils.EA.population import phenotype import utils.EA.config as c from utils import Expressions print("import successful") # %% dataLoader = DataLoader("dataset4") dimReducer = DimensionalityReducer() healthy = dataLoader.getData(["healthy"], ["THCA","LUAD"]) sick = dataLoader.getData(["sick"], ["THCA","LUAD"]) gene_labels = dataLoader.getGeneLabels() print("got data") # %% chromo_size = c.chromo_size selected_genes = dimReducer.getNormalizedFeatures(sick,healthy,"substract", chromo_size, chromo_size) print("preselected genes") #%%
class GenericModel: def __init__(self, dirpath): self.optimizer = None self.learning_rate = None self.batch_size = None self.metrics = None self.loss = None self.epochs = None self.dirpath = dirpath self.tensorboard = self.create_tb_callbacks("./tensorboards/"+self.dirpath+'/'+type(self).__name__) self.name_model = os.getcwd() + '/saved_models/'+self.dirpath+'/'+type(self).__name__+".h5" self.data_gen = DataLoader(dirpath=dirpath, batch_size=self.batch_size, downsample_factor=0) self.data_gen.build_data() self.output_size = self.data_gen.get_output_size() img_w = self.data_gen.img_w img_h = self.data_gen.img_h if K.image_data_format() == 'channels_first': self.input_shape = (1, img_w, img_h) else: self.input_shape = (img_w, img_h, 1) @staticmethod def create_input(name, shape, dtype="float32"): return Input(name=name, shape=shape, dtype=dtype) @staticmethod def ctc_loss(): return {'ctc': lambda y_true, y_pred: y_pred} @staticmethod def convolution_maxpooling(layer, conv_filters, kernel_size, name_conv, name_pool, pool_size, padding='same', activation='relu', kernel_initializer='he_normal'): inner = Conv2D(conv_filters, kernel_size, padding=padding, activation=activation, kernel_initializer=kernel_initializer, name=name_conv)(layer) return MaxPooling2D(pool_size=(pool_size, pool_size), name=name_pool)(inner) @staticmethod def bi_lstm(layer, h_size, name, return_sequences=True, kernel_initializer='he_normal', merge_method="add"): lstm_1 = LSTM(h_size, return_sequences=return_sequences, kernel_initializer=kernel_initializer, name=name)( layer) lstm_1b = LSTM(h_size, return_sequences=return_sequences, go_backwards=True, kernel_initializer=kernel_initializer, name=name + 'b')(layer) if merge_method == "add": return add([lstm_1, lstm_1b]) elif merge_method == "concatenate": return concatenate([lstm_1, lstm_1b]) elif merge_method == None: return lstm_1, lstm_1b else: print("You must give a method in order to merge the two directional layers") raise Exception @staticmethod def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) @staticmethod def from_conv_to_lstm_reshape(layer, name="reshape"): conv_to_rnn_dims = (layer.get_shape().as_list()[1], (layer.get_shape().as_list()[2]) * layer.get_shape().as_list()[3]) return Reshape(target_shape=conv_to_rnn_dims, name=name)(layer) @staticmethod def ctc_layer(y_pred, max_output_len, name_input_length, name_label, name_label_length, name_loss): labels = Input(name=name_label, shape=[max_output_len], dtype='float32') input_length = Input(name=name_input_length, shape=[1], dtype='int64') label_length = Input(name=name_label_length, shape=[1], dtype='int64') return labels, input_length, label_length, Lambda(GenericModel.ctc_lambda_func, output_shape=(1,), name=name_loss)( [y_pred, labels, input_length, label_length]) @staticmethod def create_tb_callbacks(tensorboard_dir): return TensorBoard(log_dir=tensorboard_dir, histogram_freq=0, write_graph=True, write_images=True) def build_model(self): raise NotImplementedError @staticmethod def load_model(loss, metrics, opt, name_model): model = load_model(name_model, compile=False) return model.compile(loss=loss, optimizer=opt, metrics=metrics) def initialize_training(self): raise NotImplementedError def train(self, model, tensorboard_callback, loss, metrics, nb_epochs, save, opt, lr): model.compile(loss=loss, optimizer=opt(lr), metrics=metrics) history = model.fit_generator(generator=self.data_gen.next_batch(mode="train", batch_size=self.batch_size), steps_per_epoch=self.data_gen.n["train"], epochs=nb_epochs, callbacks=[tensorboard_callback], validation_data=self.data_gen.next_batch(mode="test", batch_size=self.batch_size), validation_steps=self.data_gen.n["test"]) if save: print("saving model into : ") if not os.path.exists(os.getcwd() + '/saved_models/'+self.dirpath): os.makedirs(os.getcwd() + '/saved_models/'+self.dirpath) if os.path.exists(os.getcwd() + '/saved_models/'+self.dirpath+'/'+type(self).__name__+".h5") : print("model already saved a long time ago ") raise Exception model.save(os.getcwd() + '/saved_models/'+self.dirpath+'/'+type(self).__name__+".h5") return model, history def run_model(self, save=False, load=False): try: self.initialize_training() except Exception: print("you need to over-load the method initialize_training in your model ") raise Exception if self.optimizer is None: print("please provide an optimizer") raise Exception if self.learning_rate is None: print("please provide a learning_rate") raise Exception if self.metrics is None: print("please provide metrics") raise Exception if self.loss is None: print("please provide a loss function") raise Exception if self.epochs is None: print("please provide a number of epochs") raise Exception if load: print("Loading model") model = GenericModel.load_model(loss=self.loss, metrics=self.metrics, opt=self.optimizer, name_model=self.name_model) history = [] else: model = self.build_model() with open(os.getcwd() + '/summaries/'+type(self).__name__+'.txt','w') as fh: model.summary(print_fn=lambda x: fh.write(x + '\n')) model, history = self.train(model=model, tensorboard_callback=self.tensorboard, loss=self.loss, metrics=self.metrics, nb_epochs=self.epochs, save=save, opt=self.optimizer, lr=self.learning_rate) with open(os.getcwd()+'/logs/'+type(self).__name__+'.json','w') as log_file: log = {} log["name"] = type(self).__name__ log["batch_size"] = self.batch_size log["optimizer"]= self.optimizer.__name__ log["learning_rate"] = self.learning_rate log["epochs"] = self.epochs log["nb_train"]= self.data_gen.n["train"] log["nb_test"]= self.data_gen.n["test"] log["data_dim"]= [int(self.input_shape[0]), int(self.input_shape[1]),int( self.input_shape[2])] var_log = {} for keys_indicators in history.history.keys(): var_log[keys_indicators] = history.history[keys_indicators] print(var_log) log["train"]={} for i in range(self.epochs): log["train"][str(i)]= {} for keys_indicators in history.history.keys(): log["train"][str(i)][keys_indicators] = var_log[keys_indicators][i] log["max_values"] = {} for keys_indicators in history.history.keys(): log["max_values"][keys_indicators] = sorted(var_log[keys_indicators])[-1] json_string = model.to_json() log["summary"]=json_string json.dump(log, log_file, indent=4) return model, history
# myClassifier.runRegularizationParameterAnalysis(first_guess = 0.000001, final_value = 0.002, increment=3) myClassifier.fit_model(drop=0, warm_up=False) myClassifier.get_report() if __name__ == "__main__": import os import numpy as np # os.environ["CUDA_VISIBLE_DEVICES"] = "-1" file_name = 'Impact5' dl = DataLoader(df=file_name, split_size=0.2, should_shuffle=True, is_imbalanced=True, random_state=65, k=5, n_top_features=20, n_samples=None, should_log_inverse=False, modelling_type='c') # logistic_regression(file_name, dl) tree_classifier(file_name, dl) # svm_classifier(file_name, dl) # boostigs(file_name, dl) # knn_regressor(file_name, dl) # dnn_classifier(file_name, dl) # random_classifier(file_name, dl)
def createAccountIntelData(request): logger.info("Creating Module level data") dataloader = DataLoader(request.cls.dataFile, env) dataloader.deleteAccountIntelData() dataloader.createAccountIntelData()
from datetime import datetime from utils.DataLoader import DataLoader from validation.GridSearch import GridSearch if __name__ == '__main__': start = datetime.now() print("Imported modules", flush=True) dataLoader = DataLoader("dataset4") print("data loaded", flush=True) healthy = dataLoader.getData(["healthy"], ["THCA", "LUAD"]) sick = dataLoader.getData(["sick"], ["THCA", "LUAD"]) data = dataLoader.getData(["sick", "healthy"], ["THCA", "LUAD"]) grid_search = GridSearch(sick, healthy, data) print("got combined data", flush=True) table = grid_search.get_table_all_at_once() print("table creation done", flush=True) grid_search.save_table_to_disk(table, "grid_search_all_at_once_big") print("saved table to file", flush=True) table = grid_search.get_table_one_vs_rest() print("table creation done", flush=True) grid_search.save_table_to_disk(table, "grid_search_one_vs_rest_big")
# -*- coding: utf-8 -*- """ @Ref: https://www.cvxpy.org/examples/machine_learning/svm.html @Author: xiezizhe @Date: 17/2/2020 下午2:45 """ import cvxpy as cp from utils.DataLoader import DataLoader import numpy as np if __name__ == "__main__": data_loader = DataLoader() (x_train, y_train), (x_test, y_test) = data_loader.loadIris1(0.8) n = 2 m = len(x_train) W = cp.Variable((n, 1)) b = cp.Variable() loss = cp.sum( cp.pos(1 - cp.multiply(np.reshape(y_train.numpy(), (m, 1)), x_train.numpy() @ W + b))) reg = cp.norm(W, 1) lambd = cp.Parameter(nonneg=True) prob = cp.Problem(cp.Minimize(loss / m + lambd * reg)) lambd.value = 0.1 prob.solve() print("{} * w + {}".format(W.value, b.value)) data_loader.plot1((0.0, 10.0), (float( -b.value / W.value[1]), float(
def __init__(self) -> None: self.dl = DataLoader() self.train, self.test = pd.DataFrame(), pd.DataFrame()