def get_inputs(model): model.gene_t = tf.ones((hyp.B, hyp.N, 1)) model.gene_v = tf.ones((hyp.B, hyp.N, 1)) model.cat_t = tf.ones((hyp.B), tf.int64) model.cat_v = tf.ones((hyp.B), tf.int64) if not hyp.do_batch_balance: (model.gene_t, model.cat_t) = batcher.batch(hyp.dataset_t, hyp.B, aug=hyp.aug_train, shuffle=hyp.shuffle_train) (model.gene_v, model.cat_v) = batcher.batch(hyp.dataset_v, hyp.B, aug=hyp.aug_val, shuffle=hyp.shuffle_val) else: genes = [] cats = [] for g in range(hyp.nCats): (gene, cat) = batcher.batch('%s_%d.txt' % (hyp.dataset_t[:-4], g), hyp.B, aug=hyp.aug_train, shuffle=hyp.shuffle_train) genes.append(gene) cats.append(cat) model.gene_t = tf.concat(genes, axis=0) model.cat_t = tf.concat(cats, axis=0) genes = [] cats = [] for g in range(hyp.nCats): (gene, cat) = batcher.batch('%s_%d.txt' % (hyp.dataset_v[:-4], g), hyp.B, aug=hyp.aug_val, shuffle=hyp.shuffle_val) genes.append(gene) cats.append(cat) model.gene_v = tf.concat(genes, axis=0) model.cat_v = tf.concat(cats, axis=0) model.train_inputs = [ model.gene_t, model.cat_t, ] model.val_inputs = [ model.gene_v, model.cat_v, ] return model.train_inputs, model.val_inputs
def bag_of_words_vectorizer(datafile, k_features): """ Computes sparse term-document matrix of datafile documents, selects k best features by chi2 test. Yields batches of BATCH_SIZE of dense tdm vectors and vector of labels, transformed for keras nn. """ data = [] labels = [] for jsoned_entity in open("data.json", errors="ignore").readlines(): entity = json.loads(jsoned_entity) if entity["lang"] == "en": data.append(entity["text"]) labels.append(entity["label"]) vectorizer = TfidfVectorizer(stop_words=get_stop_words("english")) data = vectorizer.fit_transform(data) data = SelectKBest(chi2, k=k_features).fit_transform(data, labels) for vector_label_batch in batch(zip(data, labels), config.BATCH_SIZE): vectors = [] labels = [] for vec_label in vector_label_batch: vectors.append(vec_label[0].toarray()) labels.append(vec_label[1]) X = np.vstack(vectors) Y = np_utils.to_categorical(labels, 2) yield X, Y
def run(filename): tests, vehicle, tracks, model, out = input_processing.process_input( filename) results = batcher.batch(tests, vehicle, tracks, model, out[1] != 0) directory = packer.pack(results, out[0]) return directory
try: conf = input_processing.process_web_config(bcText) cur.execute("SELECT VehicleText FROM vehicle_config WHERE Name = %s", [conf.vehicle]) conf.vehicle = cur.fetchall()[0][0] for i, track in enumerate(conf.tracks): logging.info(track.__dict__) cur.execute("SELECT Path FROM track_config WHERE Name = %s", [track.file]) conf.tracks[i].path = cur.fetchone()[0] tests, vehicle, tracks, model, out = input_processing.process_web_input(conf) logging.info('batching...') results = batcher.batch(tests, vehicle, tracks, model, out[1] != 0) db = sql.connect("localhost", "rlapp", "gottagofast", "roselap") cur = db.cursor() logging.info('packing...') result_path = packer.pack(results, out[0]) display_dir = config.file_dir + "/graph/" + unique_id os.makedirs(display_dir) display_link = config.web_dir + "/graph/" + unique_id + "/" + bcName + "-dashboard.php" logging.info('done batching!') except Exception: err = traceback.format_exc() logging.exception(err)