Esempio n. 1
0
def get_inputs(model):
    model.gene_t = tf.ones((hyp.B, hyp.N, 1))
    model.gene_v = tf.ones((hyp.B, hyp.N, 1))
    model.cat_t = tf.ones((hyp.B), tf.int64)
    model.cat_v = tf.ones((hyp.B), tf.int64)

    if not hyp.do_batch_balance:
        (model.gene_t, model.cat_t) = batcher.batch(hyp.dataset_t,
                                                    hyp.B,
                                                    aug=hyp.aug_train,
                                                    shuffle=hyp.shuffle_train)
        (model.gene_v, model.cat_v) = batcher.batch(hyp.dataset_v,
                                                    hyp.B,
                                                    aug=hyp.aug_val,
                                                    shuffle=hyp.shuffle_val)
    else:
        genes = []
        cats = []
        for g in range(hyp.nCats):
            (gene, cat) = batcher.batch('%s_%d.txt' % (hyp.dataset_t[:-4], g),
                                        hyp.B,
                                        aug=hyp.aug_train,
                                        shuffle=hyp.shuffle_train)
            genes.append(gene)
            cats.append(cat)
        model.gene_t = tf.concat(genes, axis=0)
        model.cat_t = tf.concat(cats, axis=0)

        genes = []
        cats = []
        for g in range(hyp.nCats):
            (gene, cat) = batcher.batch('%s_%d.txt' % (hyp.dataset_v[:-4], g),
                                        hyp.B,
                                        aug=hyp.aug_val,
                                        shuffle=hyp.shuffle_val)
            genes.append(gene)
            cats.append(cat)
        model.gene_v = tf.concat(genes, axis=0)
        model.cat_v = tf.concat(cats, axis=0)

    model.train_inputs = [
        model.gene_t,
        model.cat_t,
    ]

    model.val_inputs = [
        model.gene_v,
        model.cat_v,
    ]

    return model.train_inputs, model.val_inputs
def bag_of_words_vectorizer(datafile, k_features):
    """
    Computes sparse term-document matrix of datafile documents, selects k best features by chi2 test.
    Yields batches of BATCH_SIZE of dense tdm vectors and vector of labels, transformed for keras nn.
    """
    data = []
    labels = []

    for jsoned_entity in open("data.json", errors="ignore").readlines():
        entity = json.loads(jsoned_entity)
        if entity["lang"] == "en":
            data.append(entity["text"])
            labels.append(entity["label"])

    vectorizer = TfidfVectorizer(stop_words=get_stop_words("english"))
    data = vectorizer.fit_transform(data)
    data = SelectKBest(chi2, k=k_features).fit_transform(data, labels)

    for vector_label_batch in batch(zip(data, labels), config.BATCH_SIZE):
        vectors = []
        labels = []
        for vec_label in vector_label_batch:
            vectors.append(vec_label[0].toarray())
            labels.append(vec_label[1])

        X = np.vstack(vectors)
        Y = np_utils.to_categorical(labels, 2)
        yield X, Y
Esempio n. 3
0
def run(filename):
    tests, vehicle, tracks, model, out = input_processing.process_input(
        filename)
    results = batcher.batch(tests, vehicle, tracks, model, out[1] != 0)
    directory = packer.pack(results, out[0])

    return directory
Esempio n. 4
0
    try:
        conf = input_processing.process_web_config(bcText)

        cur.execute("SELECT VehicleText FROM vehicle_config WHERE Name = %s", [conf.vehicle])
        conf.vehicle = cur.fetchall()[0][0]

        for i, track in enumerate(conf.tracks):
            logging.info(track.__dict__)
            cur.execute("SELECT Path FROM track_config WHERE Name = %s", [track.file])
            conf.tracks[i].path = cur.fetchone()[0]
        
        tests, vehicle, tracks, model, out = input_processing.process_web_input(conf)

        logging.info('batching...')
        results = batcher.batch(tests, vehicle, tracks, model, out[1] != 0)

        db = sql.connect("localhost", "rlapp", "gottagofast", "roselap")
        cur = db.cursor()

        logging.info('packing...')
        result_path = packer.pack(results, out[0])
        
        display_dir = config.file_dir + "/graph/" + unique_id
        os.makedirs(display_dir)
        display_link = config.web_dir + "/graph/" + unique_id + "/" + bcName + "-dashboard.php"

        logging.info('done batching!')
    except Exception:
        err = traceback.format_exc()
        logging.exception(err)