def _get_buckets(): """ Load the dataset into buckets based on their lengths. train_buckets_scale is the inverval that'll help us choose a random bucket later on. """ test_buckets = data.load_data('test_ids.enc', 'test_ids.dec') data_buckets = data.load_data('train_ids.enc', 'train_ids.dec') train_bucket_sizes = [len(data_buckets[b]) for b in xrange(len(config.BUCKETS))] print("Number of samples in each bucket:\n", train_bucket_sizes) train_total_size = sum(train_bucket_sizes) # list of increasing numbers from 0 to 1 that we'll use to select a bucket. train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes))] print("Bucket scale:\n", train_buckets_scale) return test_buckets, data_buckets, train_buckets_scale
def predict(): """ An example of how to load a trained model and use it to predict labels. """ # loads the saved model classifier = pickle.load(open(os.path.join(os.path.split(__file__)[0], 'best_model.pkl'))) # compile a predictor function predict_model = theano.function( inputs = [classifier.input], outputs = classifier.y_pred) # We can test it on some examples from test set dataset = 'mnist.pkl.gz' datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] # test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) print("Predicted values for the first 10 examples in test set:") print(predicted_values) return 1
def learn(): clf = linear_model.SGDClassifier(penalty="l2",l1_ratio=0,alpha=0.001,class_weight={1:0.3,0:0.7},n_jobs=3) rd = 100 * 1000 iter_num = 4 for i in range(iter_num): print "round",i train = load_data("train",rd) train_label = load_label("train") train_label = np.array(train_label) count = 0 for ptrain in train: print "partial",count plabel = train_label[:rd] train_label = train_label[rd:] if sum(plabel) > 0.2 * len(plabel): print "正例个数",sum(plabel) assert len(ptrain) == len(plabel) clf.partial_fit(ptrain,plabel,classes=[0,1]) else : break count += 1 print 100 * "=" print "train_label",len(train_label) return clf
def main(): from argparse import ArgumentParser argparser = ArgumentParser() argparser.add_argument('team') argparser.add_argument('--username', default='asdf7001') argparser.add_argument('--password', default='seleniumpython') argparser.add_argument('--iterations', type=int, default=1) argparser.add_argument('--monitor_url', type=str, default='http://54.149.105.175:9000') argparser.add_argument('--challenge', type=str) argparser.add_argument('--proxy', action='store_true') argparser.add_argument('--browser', type=str, default='phantomjs') argparser.add_argument('--data_dir', type=str, default='data/') argparser.add_argument('--lib_dir', type=str, default='lib/') argparser.add_argument('--kernel_dir', type=str, default='kernel/') argparser.add_argument('--kernel', action='store_true') argparser.add_argument('--predictor', default='PokeFrequencyPredictor') argparser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") args = argparser.parse_args() with open(args.team) as fp: team_text = fp.read() pokedata = load_data(args.data_dir) showdown = Showdown( team_text, MonteCarloAgent(20, pokedata), args.username, pokedata, browser=args.browser, password=args.password, ) showdown.run(args.iterations, challenge=args.challenge)
def problem2_4_1(): data, featureNames = load_data() kValues = [5, 10, 20] for k in kValues: iniCenters = orderedCenters(data, k) sses = doExperiment(k, data, initialCenters=iniCenters) print "SSES for %d: %f" % (k, sses)
def draw_contours(): print "# Trace contours" rows = 5 cols = 5 plt.rcParams["figure.figsize"] = (cols * 5, rows * 5) fig, axes = plt.subplots(ncols=cols, nrows=rows) fig.subplots_adjust(hspace=0, wspace=0) for c in range(0, cols): krange = arange(0.01, 0.99, 0.01) k = float(c) / float(cols) * 0.99 + 0.01 print "# Load data", k volume = data.load_data(k, smoothness=2) volume = rot90(volume, 1, [0, 1]) lim = len(volume) / 2 for r in range(0, rows): rp = float(r) / float(rows) zind = int(rp * lim) z = rp * (ZMAX-ZMIN) + ZMIN print "LAYER", r, zind, len(volume), len(axes) layer = volume[zind + 1] draw_slice(layer, axes[r, c], z, k) plt.savefig('contours.png', dpi=100, bbox_inches='tight', pad_inches=0)
def train(limit=-1, net_id=0, train_dir=DEFAULT_TRAINING_DATA, labels_dir=DEFAULT_LABELS, size=256): if limit == -1: limit_str = "All" else: limit_str = limit print "{}\tLoading {} images in {}".format(now(), limit, train_dir) files, labels = data.load_data(train_dir, labels_dir, verbose=0, limit=limit, size=size) net = get_net(net_id) if not net: print '{}\tNo network with id {}'.format(now(), net_id) return start_timestamp = int(time.time()) print "{}\tInitialising network...".format(now()) net.fit(files, labels) end_timestamp = int(time.time()) duration = end_timestamp - start_timestamp print "{}\tTraining complete, total duration {} seconds".format(now(), duration) file_name = '{}_{}_{}_NET{}'.format(end_timestamp, duration, limit, net_id) f = open(file_name, 'w+') print "{}\tSaving file to {}".format(now(), file_name) cPickle.dump(net, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def problem2_4_3(): data, featureNames = load_data() ssesValues = [0] # dummy first value for k in range(1, 51): sses = doExperiment(k, data) ssesValues.append(sses) print "SSES for %d: %f" % (k, sses) graphSS(ssesValues)
def main(): # Training data consits of 60000 images and 60000 labels # Testing data consists of 10000 images and 10000 labels # Each image consits of 784 (28x28) pixels each of which contains a value from # 0 to 255.0 which corresponds to its darkness or lightness. # Each input needs to be a list of numpy arrays to be valid. # Load all of the data print "Loading data..." test_images = data.load_data(LIMITED) train_images = data.load_data(LIMITED, "train-images.idx3-ubyte", "train-labels.idx1-ubyte") print "Normalizing data..." X_train, Y_train = data.convert_image_data(train_images) X_test, Y_test = data.convert_image_data(test_images) X_train = np.array(X_train) Y_train = np.array(Y_train) X_test = np.array(X_test) Y_test = np.array(Y_test) if LOAD == False: print "Building the model..." _model = model.build() else: print "Loading the model..." elements = os.listdir("model") if len(elements) == 0: print "No models to load." else: _model = model.load(elements[len(elements)-1]) if TRAIN == True: print "Training the model..." model.train(_model, X_train, Y_train, X_test, Y_test) if VISUALIZE: model.visualize(_model, test_images, VISUALIZE_TO_FILE) if TRAIN == True: print "Saving the model..." model.save(_model)
def main(): # network_architecture = dict(n_hidden_recog_1=4000, n_hidden_recog_2=1000, n_hidden_gener_1=1000, n_hidden_gener_2=4000, n_input=12562, n_z=500) network_architecture = dict(n_hidden_recog_1=4000, # 1st layer encoder neurons n_hidden_recog_2=1000, # 2nd layer encoder neurons n_hidden_gener_1=1000, # 1st layer decoder neurons n_hidden_gener_2=4000, # 2nd layer decoder neurons n_input=12562, n_z=500) data = load_data()[0:1000, :] vae = train(data, network_architecture, training_epochs=75)
def loadData(): sys.path.append('/home/hey/Desktop/MachineLearning-master/DeepLearning Tutorials/dive_into_keras/') from data import load_data nb_epoch = 5 batch_size = 100 nb_class = 10 data, label = load_data() label = np_utils.to_categorical(label, nb_class)
def train(hps): """Training loop.""" model = vgg_model.VGG(hps, FLAGS.mode) model.build_graph() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir) saver = tf.train.Saver(max_to_keep=0) sv = tf.train.Supervisor(logdir=FLAGS.log_root, is_chief=True, summary_op=None, save_summaries_secs=60, save_model_secs=600, saver=saver, global_step=model.global_step) sess = sv.prepare_or_wait_for_session() summary_writer.add_graph(sess.graph) step = 0 lrn_rate = 1e-4 dataset = load_data().train while not sv.should_stop(): data, labels = dataset.next_batch(32) print step, (_, summaries, loss, predictions, truth, train_step) = sess.run( [model.train_op, model.summaries, model.cost, model.predictions, model.labels, model.global_step], feed_dict={model.lrn_rate: lrn_rate, model._images:data, model.labels: labels}) if step < 10000: lrn_rate = 1e-4 elif step < 20000: lrn_rate = 1e-6 elif step < 30000: lrn_rate = 1e-8 else: lrn_rate = 1e-10 step += 1 if step % 1 == 0: summary_writer.add_summary(summaries, train_step) print "Step: %d, Loss: %f"%(train_step, loss) summary_writer.flush() #print predictions.shape, labels.shape if step == 100000: break sv.Stop()
def predict(): data,label=load_data() index=[i for i in range(len(data))] random.shuffle(index) data=data[index] label=label[index] (traindata,testdata)=(data[0:30000],data[0:30000]) (trainlabel,testlabel)=(label[0:30000],label[0:30000]) pred_testlabel=model.predict_classes(testdata,batch_size=1,verbose=1) num=len(testlabel) accuracy=len([1 for i in range(num) if testlabel[i]==pred_testlabel[i]])/float(num) print('model accuracy',accuracy)
def funcnn(LR,BS): data, label = load_data() label = np_utils.to_categorical(label, 10) model = Sequential() model.add(Convolution2D(4, 1, 5, 5, border_mode='valid')) model.add(Activation('relu')) model.add(Dropout(0.25)) model.add(Convolution2D(8,4, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(16, 8, 3, 3, border_mode='valid')) model.add(Activation('relu')) model.add(MaxPooling2D(poolsize=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(16*4*4, 256, init='normal')) model.add(Activation('tanh')) model.add(Dense(256, 10, init='normal')) model.add(Activation('softmax')) sgd = SGD(l2=0.001,lr=LR, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd,class_mode="categorical") #checkpointer = ModelCheckpoint(filepath="weight.hdf5",verbose=1,save_best_only=True) #model.fit(data, label, batch_size=100,nb_epoch=10,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2,callbacks=[checkpointer]) result = model.fit(data, label, batch_size=BS,nb_epoch=20,shuffle=True,verbose=1,show_accuracy=True,validation_split=0.2) #model.save_weights(weights,accuracy=False) # plot the result plt.figure plt.plot(result.epoch,result.history['acc'],label="acc") plt.plot(result.epoch,result.history['val_acc'],label="val_acc") plt.scatter(result.epoch,result.history['acc'],marker='*') plt.scatter(result.epoch,result.history['val_acc']) plt.legend(loc='under right') plt.show() plt.figure plt.plot(result.epoch,result.history['loss'],label="loss") plt.plot(result.epoch,result.history['val_loss'],label="val_loss") plt.scatter(result.epoch,result.history['loss'],marker='*') plt.scatter(result.epoch,result.history['val_loss'],marker='*') plt.legend(loc='upper right') plt.show()
def main(): from argparse import ArgumentParser argparser = ArgumentParser() argparser.add_argument('team1') argparser.add_argument('team2') argparser.add_argument('--depth', type=int, default=2) argparser.add_argument('--gamestate', type=str) argparser.add_argument('--player', type=int, default=0) args = argparser.parse_args() pokedata = load_data("data") players = [None, None] # players[args.player] = HumanAgent() players[args.player] = PessimisticMinimaxAgent(2, pokedata, log_file="normal.txt") # players[1 - args.player] = PessimisticMinimaxAgent(2, pokedata, log_file="no_cache.txt", use_cache=False) # players[1 - args.player] = DumbAgent() players[1 - args.player] = HumanAgent() with open(args.team1) as f1, open(args.team2) as f2, open("data/poke2.json") as f3: data = json.loads(f3.read()) poke_dict = Smogon.convert_to_dict(data) teams = [Team.make_team(f1.read(), poke_dict), Team.make_team(f2.read(), poke_dict)] gamestate = GameState(teams) if args.gamestate is not None: with open(args.gamestate, 'rb') as fp: gamestate = pickle.load(fp) with open('cur2.gs', 'wb') as fp: pickle.dump(gamestate, fp) gamestate.create_gamestate_arff(0) gamestate.print_readable_data(0) gamestate.print_readable_data(1) simulator = Simulator(pokedata) while not gamestate.is_over(): print "==========================================================================================" print "Player 1 primary:", gamestate.get_team(0).primary(), gamestate.get_team(0).primary().status print "Player 2 primary:", gamestate.get_team(1).primary(), gamestate.get_team(1).primary().status print "" my_action = players[0].get_action(gamestate, 0) opp_action = players[1].get_action(gamestate, 1) gamestate = simulator.simulate(gamestate, [my_action, opp_action], 0, log=True) if gamestate.get_team(0).alive(): print "You win!" print "Congrats to", gamestate.opp_team print "Sucks for", gamestate.my_team else: print "You lose!" print "Congrats to", gamestate.my_team print "Sucks for", gamestate.opp_team gamestate.turn += 1
def problem2_4_4(): data, featureNames = load_data() k = 6 iniCenters = orderedCenters(data, k) clusters, centers = kmeans(data, k, initialCenters=iniCenters) avgAll = avg(data.keys(), data) print "average: %s" % avgAll print "########################" print "cluster # -> (distance from average)" print " [# in cluster]:[items in cluster]" print "" for c in clusters: print "cluster %d -> %s: " % (c, distance(avgAll, centers[c])) print " [%d]:%s " % (len(clusters[c]), clusters[c]) print ""
def _get_tag_list(self, msg): content_type, chat_type, chat_id = telepot.glance(msg) if content_type != "photo": return None file_id = msg["photo"][-1]["file_id"] (rating, character, copyright, general) = ("", set(), set(), set()) db_data = data.load_data(chat_id, file_id) if db_data: rating = db_data["rating"] character = db_data["character"] copyright = db_data["copyright"] general = db_data["general"] return (rating, character, copyright, general) handler_name = "" with tempfile.NamedTemporaryFile() as f: self.download_file(file_id, f.name) for h in handlers_list: res = h.run(f) if res: (rating, character, copyright, general) = res handler_name = h.HANDLER_NAME break else: # Rewind file for reading with other handler. f.seek(0) if not res: raise InferenceError("Cannot do inference on this image") data.save_data(chat_id, file_id, { "rating": rating, "character": character, "copyright": copyright, "general": general, "time": msg["date"], "handler": handler_name }) return (rating, character, copyright, general)
def plot(element, dataset = 'mnist.pkl.gz'): autoencoder = pickle.load(open(os.path.join(os.path.split(__file__)[0], 'autoencoder.pkl'))) if element == 'reconstructions': print('... plot reconstructions') datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] rec = theano.function([autoencoder.x], autoencoder.z) image = Image.fromarray(tile_raster_images(X = rec(test_set_x[:100]), img_shape = (28, 28), tile_shape= (5, 20), tile_spacing=(1, 1))) image.save(os.path.join(os.path.split(__file__)[0], 'autoencoderrec.png')) elif element == 'repflds': print('... plot receptive fields') image = Image.fromarray(tile_raster_images(X=autoencoder.W.get_value(borrow = True).T, img_shape = (28, 28), tile_shape = (5, 20), tile_spacing=(1, 1))) image.save(os.path.join(os.path.split(__file__)[0],'autoencoderfilter.png')) else: print("dot't know how to plot %" % element) print("either use 'reconstructions' or 'repflds'") return -1
def xgb_800_without_mnk(): data, target = load_data() data, target, labels = normalize_data(data, target) X_train, X_test, y_train, y_test = train_test_split(data, target) reg = xgb.XGBRegressor(max_depth=12, learning_rate=0.007, n_estimators=800) reg.fit(X_train, y_train, eval_metric=eval_error, eval_set=[(X_train, y_train), (X_test, y_test)]) bst = reg.booster() fscore = bst.get_fscore() submit_model(reg) print sorted(fscore.iteritems(), key=lambda b: b[1], reverse=True) train_err = reg.evals_result_['validation_0']['error'] test_err = reg.evals_result_['validation_1']['error'] ind = np.arange(len(train_err)) plt.figure() plt.plot(ind, train_err, label='train') plt.plot(ind, test_err, label='test') plt.ylim([0.0, 0.2]) plt.legend(loc='upper left') plt.show()
def train(dataset = 'mnist.pkl.gz'): dataset = load_data(dataset) data = dataset[0][0].astype('float64') start_time = timeit.default_timer() results = np.zeros((data.shape[0], 2)) print('... training barnes-Hut tsne') for res, save in zip(bh_tsne(np.copy(data), theta = 0.5), results): save[...] = res end_time = timeit.default_timer() print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fs' % (end_time - start_time)), file = sys.stderr) with open(os.path.join(os.path.split(__file__)[0], 'data.pkl'), 'wb') as f: pickle.dump(results, f) results = results - np.min(results, axis = 0) results = results / np.max(results, axis = 0)
def predict(classifier): """ An example of how to load a trained model and use it to predict labels. """ # compile a predictor function predict_model = theano.function( inputs = [classifier.input], outputs = classifier.logRegressionLayer.y_pred) # We can test it on some examples from test set dataset = 'mnist.pkl.gz' datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] # test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) print("Predicted values for the first 10 examples in test set:") print(predicted_values) return 1
def predict_per_cpu_full(): data, target = load_data() data, target, labels = normalize_data(data, target) data = data[['C0', 'cpuFull']] data['target'] = target split_by_types = dict() cpu_groups = data.groupby('cpuFull') for name, group in cpu_groups: X_train, X_test, y_train, y_test = train_test_split(group['C0'].reshape(-1, 1), group['target']) split_by_types[str(name)] = { 'train': { 'data': X_train, 'target': y_train }, 'test': { 'data': X_test, 'target': y_test } } # print split_by_types summ = 0.0 for cpu, data_set in split_by_types.iteritems(): plt.figure() # reg = SGDRegressor(loss='huber', n_iter=100, alpha=0.0) reg = RandomForestRegressor(n_estimators=5) reg.fit(data_set['train']['data'], data_set['train']['target']) test_data = data_set['test']['data'] y_pred = reg.predict(test_data) print mape(data_set['test']['target'], y_pred), cpu plt.scatter(test_data, data_set['test']['target'], s=3, color='g', label='actual') plt.scatter(test_data, y_pred, s=3, color='r', label='predicted') plt.legend(loc='upper left') plt.ylabel('mul time') plt.title('Category: {}'.format(cpu)) plt.savefig('imgs/{}.png'.format(cpu))
def plot(dataset = 'mnist.pkl.gz'): size = 10000 print('... plotting the results') dataset = load_data(dataset) data = dataset[0][0].astype('float64') results = pickle.load(open(os.path.join(os.path.split(__file__)[0], 'data.pkl'))) results = results - np.min(results, axis = 0) results = results / np.max(results, axis = 0) out = np.zeros((size, size), dtype = 'uint8') out[...] = 255 for i in xrange(data.shape[0]): xpos = int(results[i][0] * (size - 1000) + 500) ypos = int(results[i][1] * (size - 1000) + 500) pic = scale_to_unit_interval(data[i].reshape((28, 28))) out[xpos:xpos + 28, ypos: ypos + 28] = pic * 255 print('... saving to file ' + os.path.join(os.path.split(__file__)[0], 'tsne_mnist.png')) image = Image.fromarray(out) image.save(os.path.join(os.path.split(__file__)[0], 'tsne_mnist.png'))
import os from data import load_data from sklearn.decomposition import FactorAnalysis try: import cPickle as pickle except: import pickle # Factor Analysis # ================================================================ # Apply factor analysis on the tf-idf matrix and transform raw documents into # intermediate representation. docs_tfidf, vocab_tfidf, vocabulary = load_data(subset='all') n_components = 40 fa = FactorAnalysis(n_components=n_components) fa.fit(docs_tfidf.toarray()) fa_words = fa.transform(vocab_tfidf.toarray()) # Create a dict to hold the new pca words. fa_dict = dict(zip(vocabulary, fa_words)) # Store the intermediate representation pca words on disk. fa_dict_filename = 'fa_dict.pk' if not os.path.exists(fa_dict_filename): fa_dict_file = open(fa_dict_filename, 'w') pickle.dump(fa_dict, fa_dict_file) # Store estimator on dist for further usage. fa_estimator_filename = 'fa_estimator.pk'
# print "tf.nn.l2_loss(b2).eval()", tf.nn.l2_loss(b2).eval() # print "tf.nn.l2_loss(W3).eval() * 5e-8", tf.nn.l2_loss(W3).eval() * 5e-8 # print "tf.nn.l2_loss(b3).eval()", tf.nn.l2_loss(b3).eval() # regularizers = (tf.nn.l2_loss(W1) * 5e-8 + tf.nn.l2_loss(b1) + tf.nn.l2_loss(W2) * 5e-8 + tf.nn.l2_loss(b2) + tf.nn.l2_loss(W3) * 5e-8 + tf.nn.l2_loss(b3)) # print "regularizers", regularizers.eval() # print "5e-4 * regularizers", 5e-4 * regularizers.eval() if (step % 500 == 0): print("Minibatch loss at step %d: %f" % (step, l)) print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels)) print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels)) print("Time taken:", int(time.time() - start_time)) if __name__ == "__main__": _, labels = load_data(True) # lda data (x_lda.p) was created using: # data = LDA().fit(data.toarray(), labels.nonzero()[1]).transform(data.toarray()) # pca data (x_pca.p) was created using: # data = PCA().fit(data_X.toarray()).transform(data_X.toarray()) data = pickle.load(open("x_lda.p", "rb")).astype(np.float32) n = 360 # trials per person num_subjects = 9 # num people for i in range(num_subjects): print "\n=== Using subject", i + 1, "as test set." data = np.concatenate((data[n:], data[:n]), axis=0) labels = np.concatenate((labels[n:], labels[:n]), axis=0) train(data, labels)
################### ################### ## Main ################### force_utf8_hack() os.system("rm %s" % (DONE_IND_FILE)) os.system("rm %s" % (KILLED_IND_FILE)) os.system("touch %s" % (RUNNING_IND_FILE)) ################### ## Read IP to Geolocation Database ################### if DEBUG2: print "Read IP to Geolocation Database" located_ips = data.load_data(geo_db_dir + ipinfo_db_filename) IF_DATA_READ = 1 if DEBUG3: print " #records=%d" % (len(located_ips)) if DEBUG4: for ip in located_ips: # print located_ips[ip] if 'loc' in located_ips[ip]: print " %s, %s" % (ip, located_ips[ip]['loc']) else: print " %s\n " % (ip) print located_ips[ip] # exit() ###################
if os.path.exists(PARAM_FILE_AUTH): auths = list_data.load_data(PARAM_FILE_AUTH) elif os.path.exists(dns_dir + dns_filename): auths = list_data.load_data(dns_dir + dns_filename) else: auths = list_data.load_data(cname_dir + auth_filename) if DEBUG3: print " #cnames=%d" % (len(cnames)) if DEBUG3: print " #auth dns=%d" % (len(auths)) ################### ## read done IPs ################### if DEBUG2: print "Read Done IPs" ips = data.load_data(ips_dir + ip_dict_filename) IF_DATA_READ = 1 if DEBUG3: print " #cnames=%d" % (len(ips)) ################### ## DNS Query ################### if DEBUG2: print "DNS Query" ################### ## DEBUG # cname = "0.site.51zx.com" # cnames = ["0.site.51zx.com"] # auths = ["DNS.RAIDC.COM", "agri-dns02.agri.org.cn", "agri-dns02.agri.org.cn"] # exit()
# get cmd line args from user <- config file if len(sys.argv) > 1: # get configuration g = utils.get_config(sys.argv[1]) # check if we need to process text file if g['data_file'] and utils.isEmpty(g['ckpt_path']): # check if folder exists, if not create folder utils.assert_dir(g['data_path']) data.process_data(filename= g['data_file'], path= g['data_path']) # check if checkpoint path exists utils.assert_dir(g['ckpt_path']) try: # fetch dataset X, Y, idx2ch, ch2idx = data.load_data(path=g['data_path']) except: print('\n>> Is the folder {} empty?'.format(g['ckpt_path'])) print('Shouldn\'t it be?') sys.exit() # training set batch generator trainset = utils.rand_batch_gen(X, Y, batch_size= g['batch_size']) # build the model num_classes = len(idx2ch) net = char_rnn.CharRNN(seqlen= X.shape[-1], num_classes= num_classes, num_layers= g['num_layers'], state_size= g['state_size'], epochs= 100000000,
import glob from collections import Counter sys.path.append('..') from data import load_data coords = np.load('../../data/subset_conjecture_coords.npy') counts = [np.sum(np.abs(c)) for c in coords] plt.figure() plt.title('Premise Use Histogram') sns.countplot(counts) print("zeros: {}".format(sum([1 if x == 0 else 0 for x in counts]))) print() print() cts = load_data('../../data/model/train_conjecture_unique_tokens.data') counts = list(cts.values()) plt.figure() plt.title('Conjecture Token Occurrences Histogram') sns.countplot(counts) cts = load_data('../../data/model/train_premise_unique_tokens.data') counts = list(cts.values()) plt.figure() plt.title('Premise Token Occurrences Histogram') sns.countplot(counts) print() print() def iter_premise_subtrees():
def index(): data = load_data() print(data) return render_template('dashboard_view.html', data=data)
import data import numpy as np import tensorflow as tf import time import tempfile import zipfile import os #import tflite_runtime.interpreter as tflite (x_train, y_train), (x_test, y_test) = data.load_data() print("Train data: ", x_train.shape) print("Train labels: ", y_train.shape) print("Test data: ", x_test.shape) print("Test labels: ", y_test.shape) model_File = "./coralmodels/perforated/" model_Name = "prunedlite" model_Path = os.path.join(model_File, model_Name + '.tflite') interpreter = tf.lite.Interpreter(model_path=model_Path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() test = x_test.reshape(10000, 1, 32, 32, 3) output_data = np.zeros([10000, 10]) print(y_test.shape) print(output_data.shape) print("start test")
args.device = device logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s: [ %(message)s ]', '%m/%d/%Y %I:%M:%S %p') console = logging.StreamHandler() console.setFormatter(fmt) logger.addHandler(console) logfile = logging.FileHandler(args.log_file, 'a') logfile.setFormatter(fmt) logger.addHandler(logfile) if args.test: test_graph = load_data(args.test_file) model = torch.load(args.load_model) model.device = device dev_dataset = TriviaQADataset(test_graph, model, False) dev_loader = DataLoader(dataset=dev_dataset, batch_size=1, collate_fn=batcher(device), shuffle=False, num_workers=0) model.to(device) score, total_list = evaluate(dev_loader, model) exit() train_graph = load_data(args.train_file) dev_graph = load_data(args.dev_file)
from sklearn.neighbors import KNeighborsRegressor import numpy as np import pandas as pd import data def predict(X_train, y_train, X_test): clf = KNeighborsRegressor(n_neighbors=11) clf.fit(X_train, y_train) result = clf.predict(X_test).reshape(-1, 1) return result if __name__ == "__main__": X_train, y_train, X_test, id = data.load_data() result = np.expm1(predict(X_train, y_train, X_test)) ans = np.hstack((id, result)) ans = pd.DataFrame(ans, columns=['Id', 'SalePrice']) ans['Id'] = ans['Id'].astype('Int32') ans.to_csv('submission.csv', index=False)
def shoot_mpc_qp(roll_idx): print(f'\n=== Model Based Control on Example {roll_idx} ===') ''' load data ''' seq_data = load_data(prepared_names, os.path.join(data_dir, str(roll_idx) + '.rollout.h5')) attrs, states, actions, rel_attrs = [ to_var(d.copy(), use_gpu=use_gpu) for d in seq_data ] seq_data = denormalize(seq_data, stat) attrs_gt, states_gt, actions_gt = seq_data[:3] ''' setup engine ''' param_file = os.path.join(data_dir, str(roll_idx // args.group_size) + '.param') param = torch.load(param_file) engine.init(param) n_obj = engine.num_obj ''' fit koopman ''' print('===> system identification!') fit_data = get_more_trajectories(roll_idx) fit_data = [to_var(d, use_gpu=use_gpu) for d in fit_data] bs = args.fit_num attrs_flat = get_flat(fit_data[0]) states_flat = get_flat(fit_data[1]) actions_flat = get_flat(fit_data[2]) rel_attrs_flat = get_flat(fit_data[3]) g = model.to_g(attrs_flat, states_flat, rel_attrs_flat, args.pstep) g = g.view(torch.Size([bs, args.time_step]) + g.size()[1:]) G_tilde = g[:, :-1] H_tilde = g[:, 1:] U_left = fit_data[2][:, :-1] G_tilde = get_flat(G_tilde, keep_dim=True) H_tilde = get_flat(H_tilde, keep_dim=True) U_left = get_flat(U_left, keep_dim=True) A, B, fit_err = model.system_identify(G=G_tilde, H=H_tilde, U=U_left, rel_attrs=fit_data[3][:1, 0], I_factor=args.I_factor) ''' shooting ''' print('===> model based control start!') # current can not set engine to a middle state assert args.roll_start == 0 start_step = args.roll_start g_start_v = model.to_g(attrs=attrs[start_step:start_step + 1], states=states[start_step:start_step + 1], rel_attrs=rel_attrs[start_step:start_step + 1], pstep=args.pstep) g_start = to_np(g_start_v[0]) if args.env == 'Rope': goal_step = args.roll_step + args.roll_start elif args.env == 'Soft': goal_step = args.roll_step + args.roll_start elif args.env == 'Swim': goal_step = args.roll_step + args.roll_start g_goal_v = model.to_g(attrs=attrs[goal_step:goal_step + 1], states=states[goal_step:goal_step + 1], rel_attrs=rel_attrs[goal_step:goal_step + 1], pstep=args.pstep) g_goal = to_np(g_goal_v[0]) states_start = states_gt[start_step] states_goal = states_gt[goal_step] states_roll = np.zeros((args.roll_step + 1, n_obj, args.state_dim)) states_roll[0] = states_start control = np.zeros((args.roll_step + 1, n_obj, args.action_dim)) # control_v = to_var(control, use_gpu, requires_grad=True) bar = ProgressBar() for step in bar(range(args.roll_step)): states_input = normalize([states_roll[step:step + 1]], [stat[1]])[0] states_input_v = to_var(states_input, use_gpu=use_gpu) g_cur_v = model.to_g(attrs=attrs[:1], states=states_input_v, rel_attrs=rel_attrs[:1], pstep=args.pstep) g_cur = to_np(g_cur_v[0]) ''' setup parameters ''' T = args.roll_step - step + 1 A_v, B_v = model.A, model.B A_t = to_np(A_v[0]).T B_t = to_np(B_v[0]).T if not args.baseline: Q = np.eye(args.g_dim) else: Q = np.eye(g_goal.shape[-1]) if args.env == 'Rope': R_factor = 0.01 elif args.env == 'Soft': R_factor = 0.001 elif args.env == 'Swim': R_factor = 0.0001 else: assert False R = np.eye(args.action_dim) * R_factor ''' generate action ''' rel_attrs_np = to_np(rel_attrs)[0] assert args.optim_type == 'qp' if step % args.feedback == 0: node_attrs = attrs_gt[0] if args.env in ['Soft', 'Swim'] else None u = mpc_qp(g_cur, g_goal, step, T, rel_attrs_np, A_t, B_t, Q, R, node_attrs=node_attrs, actions=to_np(actions[step:]), gt_info=[ param, states_gt[goal_step:goal_step + 1], attrs[step:step + T], rel_attrs[step:step + T] ]) else: u = u[1:] pass ''' execute action ''' engine.set_action(u[0]) # execute the first action control[step] = engine.get_action() engine.step() states_roll[step + 1] = engine.get_state() ''' render ''' engine.render(states_roll, control, param, act_scale=args.act_scale, video=True, image=True, path=os.path.join(args.shootf, str(roll_idx) + '.shoot'), states_gt=np.tile(states_gt[goal_step:goal_step + 1], (args.roll_step + 1, 1, 1)), count_down=True, gt_border=True) states_result = states_roll[args.roll_step] states_goal_normalized = normalize([states_goal], [stat[1]])[0] states_result_normalized = normalize([states_result], [stat[1]])[0] return norm(states_goal - states_result), (states_goal, states_result, states_goal_normalized, states_result_normalized)
from progressbar import ProgressBar from config import gen_args from socket import gethostname args = gen_args() os.system("mkdir -p " + args.shootf) log_path = os.path.join(args.shootf, 'log.txt') tee = Tee(log_path, 'w') print_args(args) print(f"Load stored dataset statistics from {args.stat_path}!") stat = load_data(args.data_names, args.stat_path) data_names = ['attrs', 'states', 'actions'] prepared_names = ['attrs', 'states', 'actions', 'rel_attrs'] data_dir = os.path.join(args.dataf, args.shoot_set) if args.shoot_set == 'extra' and gethostname().startswith('netmit'): data_dir = args.dataf + '_' + args.shoot_set ''' model ''' # build model use_gpu = torch.cuda.is_available() if not args.baseline: """ Koopman model""" model = CompositionalKoopmanOperators(args,
import tensorflow as tf import nltk import numpy as np # preprocessed data import data import data_utils # load data from pickle and npy files metadata, idx_q, idx_a = data.load_data(PATH='datasets/cornell_corpus/') (trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_q, idx_a) # parameters xseq_len = trainX.shape[-1] yseq_len = trainY.shape[-1] batch_size = 32 xvocab_size = len(metadata['idx2w']) yvocab_size = xvocab_size emb_dim = 1024 import seq2seq_wrapper # In[7]: model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len, yseq_len=yseq_len, xvocab_size=xvocab_size, yvocab_size=yvocab_size, ckpt_path='ckpt/cornell_corpus/',
sample_strings = ['Sous le pont Mirabeau coule la Seine.']*4 algo = 'adam' # adam, sgd dump_path = os.path.join(os.environ.get('TMP_PATH'), 'handwriting', str(np.random.randint(0, 100000000, 1)[0])) if not os.path.exists(dump_path): os.makedirs(dump_path) ######## # DATA # ######## char_dict, inv_char_dict = cPickle.load(open('char_dict.pkl', 'r')) # All the data is loaded in memory train_pt_seq, train_pt_idx, train_str_seq, train_str_idx = \ load_data('hand_training.hdf5') train_batch_gen = create_generator( True, batch_size, train_pt_seq, train_pt_idx, train_str_seq, train_str_idx, chunk=chunk) valid_pt_seq, valid_pt_idx, valid_str_seq, valid_str_idx = \ load_data('hand_training.hdf5') valid_batch_gen = create_generator( True, batch_size, valid_pt_seq, valid_pt_idx, valid_str_seq, valid_str_idx, chunk=chunk) ################## # MODEL CREATION # ################## # shape (seq, element_id, features) seq_pt = T.tensor3('input', floatX) seq_str = T.matrix('str_input', 'int32')
aref = getattr(locale, attr) locale.setlocale(aref, '') (lang, enc) = locale.getlocale(aref) if lang != None: try: locale.setlocale(aref, (lang, 'UTF-8')) except: os.environ[attr] = lang + '.UTF-8' ################### ## Main ################### force_utf8_hack() cnames = data.load_data(output_dir + cname_dict_filename) auths = data.load_data(output_dir + auth_dict_filename) cname_list = set(list_data.load_data(output_dir + cname_filename)) auth_list = set(list_data.load_data(output_dir + auth_filename)) fail_cnames = data.load_data(output_dir + fail_cname_dict_filename) fail_auths = data.load_data(output_dir + fail_auth_dict_filename) fail_cname_list = set(list_data.load_data(output_dir + fail_cname_filename)) fail_auth_list = set(list_data.load_data(output_dir + fail_auth_filename)) # data_cname_list = set() # for hostname in cnames: # # print " " + hostname # data_cname_list.update(cnames[hostname]) # print " # hostnames = %d" % (len(cnames)) # print " # cnames = %d" % (len(data_cname_list))
def train_nn( # Hyper-Parameters dim_token=100, # word embeding dimension dim_locDiff=10, # location difference dimension dim_cueType=10, # dim_ESP_label=10, dim_latent=100, lstm_layer_n=50, lstm_decoder_layer_n=50, n2=50 + 10 + 10, ydim0=3, ydim1=3, # win_size = 2, # maxTokens1 = 60, # maximum tokens in sentence 1 # n_ESP_labels = 3, n_cueTypes=4, n_vocb_words=4396, # Vocabulary size n_locDiffs=111, # Location difference size end_idx=3194, patience=10, # Number of epoch to wait before early stop if no progress max_epochs=100, # The maximum number of epoch to run # dispFreq=10, # Display to stdout the training progress every N updates # decay_c=0., # Weight decay for the classifier applied to the U weights. lrate=0.01, # Learning rate for sgd (not used for adadelta and rmsprop) dropout_p=1.0, optimizer=momentum, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). # maxlen=1000, # Sequence longer then this get ignored batch_size=10, # The batch size during training. inter_cost_margin=0.001, # Parameter for extra option # noise_std=0., # use_dropout=True, # if False slightly faster, but worst test error # This frequently need a bigger model. # reload_model=None, # Path to a saved model we want to start from. # test_size=-1 ): # Model options model_options = locals().copy() print('-------------------------------------------------------------') print("model options", model_options) print('-------------------------------------------------------------') # load_data, prepare_data = get_dataset(dataset) print('Loading data ... ... ...') train, valid, test = data.load_data(path='mydata.pkl', n_words=n_vocb_words) print('Building model ... ... ...') params_all = init_params(model_options, Wemb_value=data.read_gz_file("word_emb.pkl")) # tparams = init_tparams(params) tparams_d = init_tparams(params_all[0]) tparams_g = init_tparams(params_all[1]) tparams_c = OrderedDict() for kk, pp in tparams_d.items(): tparams_c[kk] = tparams_d[kk] for kk, pp in tparams_g.items(): tparams_c[kk] = tparams_g[kk] (x, masks, x_d_y_fake, y, x_noises, x_maxlens, f_D_pred_prob, f_D_pred, f_G_produce, dropouts, d_cost, g_cost) = Build_Model([tparams_d, tparams_g], model_options) d_grads = tensor.grad(d_cost, wrt=list(tparams_d.values())) # print(tparams_c) g_grads = tensor.grad(g_cost, wrt=list(tparams_c.values()), consider_constant=list(tparams_d.values()), disconnected_inputs='ignore') lr = tensor.scalar(name='lr') # f_grad_shared, f_update = optimizer(lr, tparams, grads, x, masks, y, cost) f_D_grad_shared, f_D_update = optimizer(lr, tparams_d, d_grads, x + dropouts, masks, x_d_y_fake + y, d_cost) # f_G_grad_shared, f_G_update = optimizer(lr, tparams_c, g_grads, # x_noise + x_maxlen + x_d_ps + dropouts_g, [], x_g_y_fake + yg, g_cost) f_G_grad_shared, f_G_update = optimizer(lr, tparams_c, g_grads, x + x_noises + x_maxlens, masks, y, g_cost) print('training ... ... ...') kf_valid = get_minibatches_idx(len(valid[0]), batch_size) kf_test = get_minibatches_idx(len(test[0]), batch_size) print("%d train examples" % len(train[0])) print("%d valid examples" % len(valid[0])) print("%d test examples" % len(test[0])) # history_errs = [] best_p = None bad_counter = 0 stop_counter = 0 # if validFreq == -1: # validFreq = len(train[0]) // batch_size # if saveFreq == -1: # saveFreq = len(train[0]) // batch_size # last_training_sum_costs = numpy.inf last_ave_of_g_costs = numpy.inf last_ave_of_d_costs = numpy.inf g_costs_list = [] d_costs_list = [] uidx = 0 # the number of update done estop = False # early stop # start_time = time.time() try: for eidx in range(max_epochs): n_samples = 0 # Get new shuffled index for the training set. kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) # kf = get_minibatches_idx(99, batch_size, shuffle=True) # training_sum_costs = 0 # ave_of_g_costs_sum = 0 # ave_of_d_costs_sum = 0 for train_batch_idx, train_index in kf: # uidx += 1 # use_noise.set_value(1.) cur_batch_size = len(train_index) # Select the random examples for this minibatch x_0 = [train[0][t] for t in train_index] x_1 = [train[1][t] for t in train_index] x_3 = [train[2][t] for t in train_index] y_0 = [train[3][t] for t in train_index] y_1 = [train[4][t] for t in train_index] y_one_out = [train[5][t] for t in train_index] x_0, mask_0, maxlen_0 = data.prepare_data(x_0) x_1, mask_1, maxlen_1 = data.prepare_data(x_1) x_3, mask_3, maxlen_3 = data.prepare_data(x_3, addIdxNum=2) y_0 = numpy.asarray(y_0, dtype='int32') y_1 = numpy.asarray(y_1, dtype='int32') y_one_out = numpy.asarray(y_one_out, dtype='int32') rng = numpy.random.RandomState(9998) x0_noise_0 = rng.normal(scale=0.01, size=(cur_batch_size, dim_latent)).astype(config.floatX) x1_noise_1 = rng.normal(scale=0.01, size=(cur_batch_size, dim_latent)).astype(config.floatX) x3_noise_3 = rng.normal(scale=0.01, size=(cur_batch_size, dim_latent)).astype(config.floatX) generated_xs = f_G_produce(x0_noise_0, x1_noise_1, x3_noise_3, maxlen_0, maxlen_1, maxlen_3, y_0, y_1) # numpy.asarray([3] * cur_batch_size, dtype='int32')# generated_x_0 = generated_xs[0] generated_x_1 = generated_xs[1] generated_x_3 = numpy.concatenate( ( # numpy.random.randint(0, n_cueTypes, (cur_batch_size,)).astype('int32')[None,:], # numpy.random.randint(0, n_locDiffs, (cur_batch_size,)).astype('int32')[None,:], x_3[0:2, :], generated_xs[2]), axis=0) generated_m_0 = generated_xs[3] generated_m_1 = generated_xs[4] generated_m_3 = generated_xs[5] generated_y_0 = numpy.random.randint(0, ydim0 - 1, (cur_batch_size,)).astype('int32') generated_y_1 = numpy.random.randint(0, ydim1, (cur_batch_size,)).astype('int32') x_d_0 = numpy.concatenate((x_0, generated_x_0), axis=1) x_d_1 = numpy.concatenate((x_1, generated_x_1), axis=1) x_d_3 = numpy.concatenate((x_3, generated_x_3), axis=1) y_d_0_fake = numpy.asarray([1] * cur_batch_size + [0] * cur_batch_size, dtype='int32') y_d_1_fake = numpy.asarray([1] * cur_batch_size + [0] * cur_batch_size, dtype='int32') y_d_3_fake = numpy.asarray([1] * cur_batch_size + [0] * cur_batch_size, dtype='int32') # mask_ones_0 = numpy.ones_like(mask_0) # mask_ones_1 = numpy.ones_like(mask_1) # mask_ones_3 = numpy.ones_like(mask_3) mask_d_0 = numpy.concatenate((mask_0, generated_m_0), axis=1) mask_d_1 = numpy.concatenate((mask_1, generated_m_1), axis=1) mask_d_3 = numpy.concatenate((mask_3, generated_m_3), axis=1) y_d_0 = numpy.concatenate((y_0, generated_y_0), axis=0) y_d_1 = numpy.concatenate((y_1, generated_y_1), axis=0) d_cost = f_D_grad_shared(x_d_0, x_d_1, x_d_3, dropout_p, 1.0, mask_d_0, mask_d_1, mask_d_3, y_d_0_fake, y_d_1_fake, y_d_3_fake, y_d_0, y_d_1) g_cost = f_G_grad_shared(x_0, x_1, x_3, x0_noise_0, x1_noise_1, x3_noise_3, 16, 16, 12, mask_0, mask_1, mask_3, generated_y_0, generated_y_1) # print(y_g_0.shape) print('\rd_cost = %f g_cost = %f @ %d' % (d_cost, g_cost, train_batch_idx), end='') # print(cur_batch_size) # ave_of_g_costs_sum += g_cost # ave_of_d_costs_sum += d_cost g_costs_list.append(g_cost) d_costs_list.append(d_cost) if d_cost < g_cost * 0.8: for i in range(10): f_G_update(0.01) g_cost = f_G_grad_shared(x_0, x_1, x_3, x0_noise_0, x1_noise_1, x3_noise_3, 16, 16, 12, mask_0, mask_1, mask_3, generated_y_0, generated_y_1) if d_cost / g_cost >= 0.8 and d_cost / g_cost <= 1.0 / 0.8: break elif g_cost < d_cost * 0.8: for i in range(10): f_D_update(0.01) d_cost = f_D_grad_shared(x_d_0, x_d_1, x_d_3, dropout_p, 1.0, mask_d_0, mask_d_1, mask_d_3, y_d_0_fake, y_d_1_fake, y_d_3_fake, y_d_0, y_d_1) if g_cost / d_cost >= 0.8 and g_cost / d_cost <= 1.0 / 0.8: break else: f_D_update(0.01) f_G_update(0.01) if train_batch_idx % 100 == 0 or train_batch_idx == len(kf) - 1: print("---Now %d/%d training bacthes @ epoch = %d" % (train_batch_idx, len(kf), eidx)) if train_batch_idx > 0 and \ (train_batch_idx % 500 == 0 or train_batch_idx == len(kf) - 1): cur_ave_of_d_costs = sum(d_costs_list) / len(d_costs_list) cur_ave_of_g_costs = sum(g_costs_list) / len(g_costs_list) print('ave_of_d_costs_sum = %f\tave_of_g_costs_sum = %f' % (cur_ave_of_d_costs, cur_ave_of_g_costs)) # print('outputing predicted labels of test set ... ... ...') output_pred_labels(model_options, f_D_pred, f_D_pred_prob, data.prepare_data, test, kf_test, verbose=False, path="test_pred_labels.txt") if cur_ave_of_d_costs >= last_ave_of_d_costs * 0.99 and \ cur_ave_of_g_costs >= last_ave_of_g_costs * 0.99: stop_counter += 1 last_ave_of_d_costs = cur_ave_of_d_costs last_ave_of_g_costs = cur_ave_of_g_costs print('counter for early stopping : %d/%d' % (stop_counter, patience)) del d_costs_list[:] del g_costs_list[:] if stop_counter >= patience: print('Early Stop!') estop = True break # end for if stop_counter >= patience: print('Early Stop!') estop = True break if estop: break except KeyboardInterrupt: print("Training interupted")
def iou(y_true, y_pred): def f(y_true, y_pred): intersection = (y_true * y_pred).sum() union = y_true.sum() + y_pred.sum() - intersection x = (intersection + 1e-15) / (union + 1e-15) x = x.astype(np.float32) return x return tf.numpy_function(f, [y_true, y_pred], tf.float32) if __name__ == "__main__": ## Dataset path = "CVC-612/" (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(path) ## Hyperparameters batch = 8 lr = 1e-4 epochs = 20 train_dataset = tf_dataset(train_x, train_y, batch=batch) valid_dataset = tf_dataset(valid_x, valid_y, batch=batch) model = build_model() opt = tf.keras.optimizers.Adam(lr) metrics = [ "acc", tf.keras.metrics.Recall(),
def iter_premise_subtrees(): for f in glob.glob('../../data/model/train_premise_subtrees_0*'): trees = load_data(f) print(f) for t in trees: yield t
from collections import defaultdict import dash from dash.dependencies import Output, Input, State import dash_core_components as dcc import dash_html_components as html import plotly.graph_objs as go import pandas as pd from flask_caching import Cache from data import load_data, get_config, get_countires_mapping from utils import CircleMarkerSizer, options, get_axis # Loading data data = load_data() # Options DATA_CHOICES = sorted(list(data.keys())) DEFAULT_X_AXIS = "Life expectancy" DEFAULT_Y_AXIS = "GDP per capita" COUNTIRES_MAPPING = get_countires_mapping() SUPPORTED_COUNTRIES = COUNTIRES_MAPPING.keys() init_config = get_config( data, DEFAULT_X_AXIS, DEFAULT_Y_AXIS, supported_countries=SUPPORTED_COUNTRIES ) # Creating app app = dash.Dash(__name__) app.title = "Poor man's Gapminder"
import numpy as np from groups_fct import match, mean_notes from data import load_data users_id = [10, 24, 36, 50, 15] notes = [[4,2,3,0,4],[0,0,3,5,4],[1,5,3,1,1],[4,0,3,5,4],[0,5,5,5,0]] film_notes = [[4,2,3,0,4],[0,0,3,5,4],[1,5,3,1,1],[4,0,3,5,4],[0,5,5,5,0]] nb_films = 5 nb_users = 5 groups = [] group_notes = [] eps = 4 df = load_data() for i, value in enumerate(users_id) : match_result = np.array([]) if (i==0) : groups.append([i]) group_notes.append(notes[i]) else : print ("notes : ", notes[i]) for j in range (len(groups)): ind, result = match(notes[i], group_notes[j]) match_result = np.append(match_result,result)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) creative_model = keyedvectors.KeyedVectors.load_word2vec_format( "checkpoints/creative_model.w2v", binary=True) ad_model = keyedvectors.KeyedVectors.load_word2vec_format( "checkpoints/ad_model.w2v", binary=True) product_model = keyedvectors.KeyedVectors.load_word2vec_format( "checkpoints/product_model.w2v", binary=True) advertiser_model = keyedvectors.KeyedVectors.load_word2vec_format( "checkpoints/advertiser_model.w2v", binary=True) industry_model = keyedvectors.KeyedVectors.load_word2vec_format( "checkpoints/industry_model.w2v", binary=True) train_ad, train_click, train_user, test_ad, test_click = data.load_data() # train_record train_record = pd.merge(train_click, train_ad, on="creative_id") # test_record test_record = pd.merge(test_click, test_ad, on="creative_id") # TODO train embedding train_grouped = train_record.groupby("user_id") test_grouped = test_record.groupby("user_id") def get_embedding_from_grouped(user_id, records, column_name, keep_uid=False): if column_name == "ad_id": model = ad_model elif column_name == "creative_id": model = creative_model
def main(): val_names, data_set = load_data() iter_limit = 20 # Q2.4.1: The values of sum of within group sum of squares for k = 5, k = 10 and k = 20. print("Q 2.4.1") for k in [5, 10, 20]: init_centers = init_centers_first_k(data_set, k) centers, clusters, num_iterations = train_kmean( data_set, init_centers, iter_limit) print("k =", str(k) + ": " + str(sum_of_within_group_ss(clusters, centers))) print() # Q2.4.2: The number of iterations that k-means ran for k = 5. print("Q 2.4.2") k = 5 init_centers = init_centers_first_k(data_set, k) centers, clusters, num_iterations = train_kmean(data_set, init_centers, iter_limit) print("k =", str(k) + ", num_iter: " + str(num_iterations)) print() # Q2.4.3: A plot of the sum of within group sum of squares versus k for k = 1 - 50. # Please start your centers randomly (choose k points from the dataset at random). print("Q 2.4.3") SSK = [] min_ssk = 10000000 min_ssk_c = 0 # for k in range(1, 51): # init_centers = init_centers_random(data_set, k) # centers, clusters, num_iterations = train_kmean(data_set, init_centers, iter_limit) # ssk = sum_of_within_group_ss(clusters, centers) # print (str(k) + ", " + str(ssk)) # SSK.append(ssk) # if ssk < min_ssk: # min_ssk = ssk # min_ssk_c = k # plt.plot([i+1 for i in range(50)], SSK, color='r') # plt.show() print("Q 2.4.4") print("The best k is ", str(min_ssk_c), "because it has the minimun ssk = ", str(min_ssk)) print("Q 2.4.5") init_centers = init_centers_random(data_set, 50) centers, clusters, num_iterations = train_kmean(data_set, init_centers, iter_limit) data_set_veg = vect_avg(data_set, 50) distances = [] for i in range(50): dis = dist(centers[i], data_set_veg) print("the distance between ", str(i + 1), "th center and avg of all countries is ", str(dis)) distances.append(dis) plt.plot(distances) plt.show() plt.plot(distances, color='r') plt.ylim((0, 10)) plt.show() print("Q 2.4.6") for i in range(len(clusters)): for j in range(len(clusters[i])): if clusters[i][j]['country'] == 'China': print("The country belongs to ", str(i), "the center") print("The set contain: ") for k in range(len(clusters[i])): if k != j: print(clusters[i][k]['country']) sys.exit(0)
import matplotlib.pyplot as plt from data import train_generator, validation_generator def augmented_data_distribution(): batch_size = 1000 #len(train_samples) batch_count = 1 epoch = 40 non_zero_bias = 1 / (1 + epoch / 5.) data = train_generator(train_samples, batch_size, non_zero_bias) for i in range(batch_count): batch_images, batch_steering = (next(data)) plt.hist(batch_steering, bins=100) plt.show() def raw_data_distribution(): batch_size = 1000 #len(train_samples) batch_count = 1 data = validation_generator(train_samples, batch_size=batch_size) for i in range(batch_count): batch_images, batch_steering = (next(data)) plt.hist(batch_steering, bins=100) plt.show() train_samples, validation_samples = load_data() #raw_data_distribution() augmented_data_distribution()
import resnet from keras.callbacks import ReduceLROnPlateau, EarlyStopping, CSVLogger, TensorBoard import numpy as np import data from keras.utils import np_utils from cfg import config lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) early_stopper = EarlyStopping(min_delta=0.001, patience=10) csv_logger = CSVLogger(config.csvlogger_path) logging = TensorBoard(config.logs_path) x_train, y_train, x_val, y_val = data.load_data(config.train_path, radio=config.radio) x_train, y_train, x_val, y_val = data.normalize(x_train, y_train, x_val, y_val) model = resnet.ResnetBuilder.build_resnet_50( (config.img_channels, config.img_rows, config.img_cols), config.nb_classes) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if not config.data_augmentation: print('Not using data augmentation.') model.fit(x_train, y_train, batch_size=config.batch_size, nb_epoch=config.nb_epoch, validation_data=(x_val, y_val),
from data import load_data from classify import classify_fills import json fills = [f for f in classify_fills(load_data()) if f.kind != 'native'] for fill in fills: print(json.dumps(fill.__dict__))
Y_test[i] = x[1] if i % 1000 == 0: print('i: {}/{}'.format(i, len(test_loader))) Y_training = Y_training.type('torch.LongTensor') Y_test = Y_test.type('torch.LongTensor') dat = {'X_train': X_training, 'Y_train': Y_training, 'X_test': X_test, 'Y_test': Y_test, 'nc': nc} ''' #adsfasdfs dat = data.load_data(args.dataset, args.dataroot, args.batchSize, device=device, imgsize=args.imageSize, Ntrain=args.Ntrain, Ntest=args.Ntest) #dat = data.load_data(args.dataset, args.dataroot, args.batchSize, # device=device, imgsize=args.imageSize, Ntrain=args.Ntrain, Ntest=args.Ntest) #### defining generator #netG = nets.Generator(args.imageSize, args.nz, args.ngf, dat['nc']).to(device) #netG = nets.Generator(args.imageSize, args.nz, args.ngf, dat['nc']).to(device) netG = nets.Generator2(args.imageSize, args.nz, args.ngf, dat['nc']).to(device) #netG = nets.Generator(args.imageSize, args.nz, args.ngf, dat['nc']).to(device) netG2 = nets.Generator(args.imageSize, args.nz, args.ngf, dat['nc']).to(device)
default=0, help="0:only embedding 1:only feature 2:both") parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--gpu_id', type=int, default=3) args = parser.parse_args() pprint(vars(args)) timer = utils.timer(name='main').tic() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) data_path = args.datadir + args.data # ******************************* # 使用 warm feature 后结果明显下降 # has_warm_feature = False # ******************************* data = data.load_data(args) warm_embeddings = data['warm_embeddings'] cold_features = data['cold_features'] warm_features = data['warm_features'] timer.toc('loaded data') # build model classifier = model.Edge_classifier(cold_feature_dim=cold_features.shape[-1], warm_feature_dim=warm_features.shape[-1], embed_dim=warm_embeddings.shape[-1], type=args.type, lr=args.lr, n_layers=args.n_layers, hid_dim=args.hid_dim, dropout=args.dropout) classifier.build_model()
else: filename = plnode_dir + deploy_node_filename ####### # filename = plnode_dir + deploy_node_filename ####### nodes = list_data.load_data(filename) ################### ## get data ################### if DEBUG2: print "Get Data" cnames = data.load_data(cname_dir + cname_dict_filename) auths = data.load_data(cname_dir + auth_dict_filename) cname_list = set(list_data.load_data(cname_dir + cname_filename)) auth_list = set(list_data.load_data(cname_dir + auth_filename)) fail_cnames = data.load_data(cname_dir + fail_cname_dict_filename) fail_auths = data.load_data(cname_dir + fail_auth_dict_filename) fail_cname_list = set(list_data.load_data(cname_dir + fail_cname_filename)) fail_auth_list = set(list_data.load_data(cname_dir + fail_auth_filename)) ################### ## find remain hostnames ################### if DEBUG2: print "Find Remain Hostnames" hostnames = set(list_data.load_data(hostname_dir + hostname_filename))
import tensorflow import data import network import matplotlib.pyplot from tensorflow.keras import backend as K from tensorflow.keras.models import load_model from metrics import dice from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint import time epochs = 10 size_train = 128 batch_size = 4 step_per_epochs = size_train // batch_size data_train, data_test, label_train, label_test = data.load_data() train_sequence = data.MouseBrainSequence((data_train, label_train), size=size_train, batch_size=batch_size) data_test, label_test = data.fill_augment(data_test, label_test, batch_size) model = network.unet_model_3d((128, 128, 128, 1), depth=2) model.summary() model_file = "models/unet3d-{}".format(int(time.time())) checkpoint = ModelCheckpoint("{}.h5".format(model_file), save_best_only=True) csv_logger = CSVLogger("{}-history.csv".format(model_file), append=False, separator=" ") model_history = model.fit(
## only valid DNS: # auths = list_data.load_data(cname_dir + auth_filename) auths = list_data.load_data(dns_dir + dns_filename) ################################################### if DEBUG3: print " #cnames=%d" % (len(cnames)) if DEBUG3: print " #auth dns=%d" % (len(auths)) ################### ## read done IPs ################### if DEBUG2: print "Read Done IPs" ips = data.load_data(ips_dir + ip_dict_filename) IF_DATA_READ = 1 if DEBUG3: print " #cnames=%d" % (len(ips)) ################### ## find remain CNAMEs ################### # if DEBUG2: print "Find Remain CNAMEs" # done_cnames = ips.keys() # remain_cnames = list(set(cnames) - set(done_cnames)) # cnames = remain_cnames
def data(): data = load_data() return jsonify(data)
print("Using the GPU") device = torch.device("cuda") else: print("WARNING: You are about to run on cpu, and this will likely run out \ of memory. \n You can try setting batch_size=1 to reduce memory usage") device = torch.device("cpu") ############################################################################### # # DATA LOADING & PROCESSING # ############################################################################### print('Training on ' + args.train_data_path + '\nTesting on ' + args.test_data_path) train_data = load_data(args.train_data_path) test_data = load_data(args.test_data_path) if args.debug: print('Nevermind, actually debugging with 10 t/v simple pendulums') args.num_train = 100 args.num_test = 100 args.seq_len = 5000 args.vocab_size = 100 from data import get_simple_pendulums_digitize train_data = get_simple_pendulums_digitize(args.num_train, args.seq_len, args.vocab_size) testid_data = get_simple_pendulums_digitize(args.num_test, args.seq_len, args.vocab_size)
def predict(mean=0.0, std=1.0): # load and normalize data if mean == 0.0 and std == 1.0: imgs_train, _, _ = load_data(train_images_path, num_classes) mean = np.mean(imgs_train) std = np.std(imgs_train) imgs_test, imgs_mask_test, names_test = load_data(test_images_path, num_classes) mean = np.mean(imgs_test) std = np.std(imgs_test) original_imgs_test = imgs_test.astype(np.uint8) imgs_test -= mean imgs_test /= std # load model with weights #model = unet(num_classes) #Unet model #model = get_frontend(imSize,imSize, num_classes) #Dilation model model = get_dilation_model_unet(imSize, imSize, num_classes) #combination model model.load_weights(weights_path) # make predictions imgs_mask_pred = model.predict(imgs_test, verbose=1) # save to mat file for further processing if not os.path.exists(predictions_path): os.mkdir(predictions_path) matdict = { 'pred': imgs_mask_pred, 'image': original_imgs_test, 'mask': imgs_mask_test, 'name': names_test } savemat(os.path.join(predictions_path, 'predictions.mat'), matdict) # save images with segmentation and ground truth mask overlay for i in range(len(imgs_test)): pred = imgs_mask_pred[i] #print(original_imgs_test.shape) image = original_imgs_test[i] mask = imgs_mask_test[i] # segmentation mask is for the middle slice image_rgb = gray2rgb(image[:, :, 0]) # prediction contour image (add all the predictions) pred = (np.round(pred) * 255.0).astype(np.uint8) # ground truth contour image (add all the masks) mask = (np.round(mask) * 255.0).astype(np.uint8) # combine image with contours using red for pred and blue for mask pred_rgb = np.array(image_rgb) annotation = pred_rgb[:, :, 1] #Set all the pixels with the annotation to zero and fill it in with the color for c in range(num_classes): pred_temp = pred[:, :, c] mask_temp = mask[:, :, c] pred_temp, contours, _ = cv2.findContours(pred_temp.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) pred_temp = np.zeros(pred_temp.shape) cv2.drawContours(pred_temp, contours, -1, (255, 0, 0), 1) mask_temp, contours, _ = cv2.findContours(mask_temp.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) mask_temp = np.zeros(mask_temp.shape) cv2.drawContours(mask_temp, contours, -1, (255, 0, 0), 1) pred[:, :, c] = pred_temp mask[:, :, c] = mask_temp annotation[np.maximum(pred[:, :, c], mask[:, :, c]) == 255] = 0 pred_rgb[:, :, 0] = pred_rgb[:, :, 1] = pred_rgb[:, :, 2] = annotation for c in range(num_classes): pred_rgb[:, :, 2] = np.maximum(pred_rgb[:, :, 2], mask[:, :, c]) pred_rgb[:, :, 1] = np.maximum( pred_rgb[:, :, 1], (pred[:, :, c] / 255) * class_colors[c][1]) pred_rgb[:, :, 2] = np.maximum( pred_rgb[:, :, 2], (pred[:, :, c] / 255) * class_colors[c][2]) pred_rgb[:, :, 0] = np.maximum( pred_rgb[:, :, 0], (pred[:, :, c] / 255) * class_colors[c][0]) imsave(os.path.join(predictions_path, names_test[i] + '.png'), pred_rgb) return imgs_mask_test, imgs_mask_pred, names_test
################### ## get PlanetLab nodes states ################### if DEBUG2: print "Get PlanetLab Nodes" nodes = list_data.load_data(plnode_dir + deploy_node_filename) if DEBUG3: print " %d nodes" % (len(nodes)) ################### ## get data ################### if DEBUG2: print "Get Data" cnames = data.load_data(cname_dir + cname_dict_filename) auths = data.load_data(cname_dir + auth_dict_filename) cname_list = set(list_data.load_data(cname_dir + cname_filename)) auth_list = set(list_data.load_data(cname_dir + auth_filename)) fail_cnames = data.load_data(cname_dir + fail_cname_dict_filename) fail_auths = data.load_data(cname_dir + fail_auth_dict_filename) fail_cname_list = set(list_data.load_data(cname_dir + fail_cname_filename)) fail_auth_list = set(list_data.load_data(cname_dir + fail_auth_filename)) IF_HOST_READ = 1 ################### ## Check Remote Status -- Prepare Tmp Directory ################### if DEBUG2: print "Prepare Tmp Directory"
print_steps = 100 train_nums = 30000 buffer_size = 1000 regular_rate = 0.0005 batch_size = 128 channels = 1 resume = True #是否继续训练模型? ''' losslist = [] accuracy = [] ''' #os.environ['CUDA_VISIBLE_DEVICES']=gpunum label_batch, image_batch = data.load_data(tr.train_dir, buffer_size, batch_size, channels) regularizer = None #tf.contrib.layers.l2_regularizer(regular_rate) logits, tt = tr.process(image_batch, train=True, regularizer=regularizer, channels=channels) global_step = tf.Variable(0, trainable=False) prob_batch = tf.nn.softmax(logits) accuracy_top1_batch = tf.reduce_mean( tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 1), tf.float32)) accuracy_top5_batch = tf.reduce_mean( tf.cast(tf.nn.in_top_k(prob_batch, label_batch, 5), tf.float32)) accuracy_top10_batch = tf.reduce_mean(
from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.advanced_activations import PReLU from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.optimizers import SGD, Adadelta, Adagrad from keras.utils import np_utils, generic_utils from six.moves import range from data import load_data import random #加载数据 data, label = load_data() #打乱数据 index = [i for i in range(len(data))] random.shuffle(index) data = data[index] label = label[index] print(data.shape[0], ' samples') #label为0~9共10个类别,keras要求格式为binary class matrices,转化一下,直接调用keras提供的这个函数 label = np_utils.to_categorical(label, 10) ############### #开始建立CNN模型 ############### #生成一个model
import matplotlib.cm as cm #load the saved model model = cPickle.load(open("model.pkl", "rb")) #define theano funtion to get output of FC layer get_feature = theano.function([model.layers[0].input], model.layers[11].get_output(train=False), allow_input_downcast=False) #define theano funtion to get output of first Conv layer get_featuremap = theano.function([model.layers[0].input], model.layers[2].get_output(train=False), allow_input_downcast=False) data, label = load_data() # visualize feature of Fully Connected layer #data[0:10] contains 10 images feature = get_feature(data[0:10]) #visualize these images's FC-layer feature plt.imshow(feature, cmap=cm.Greys_r) plt.show() #visualize feature map of Convolution Layer num_fmap = 4 #number of feature map for i in range(num_fmap): featuremap = get_featuremap(data[0:10]) plt.imshow(featuremap[0][i], cmap=cm.Greys_r) #visualize the first image's 4 feature map plt.show()