Example #1
0
def get_predictions(age, cp, sex, trestbps, chol, fbs, restecg, thalach, exang,
                    oldpeak, req_model, slope, ca, thal):
    data = pd.DataFrame(
        {
            'age': age,
            'sex': sex,
            'cp': cp,
            'trestbps': trestbps,
            'chol': chol,
            'fbs': fbs,
            'restecg': restecg,
            'thalach': thalach,
            'exang': exang,
            'oldpeak': oldpeak,
            'slope': slope,
            'ca': ca
        },
        index=[0])
    extract_data(data)
    vals = data.iloc[:].values

    if req_model == 'DecisionTree':
        print(req_model)
        return decisionTree.predict(vals)[0]
    elif req_model == 'LogisiticRegression':
        print(req_model)
        print("get Pred LR")
        return logisticRegression.predict(vals)[0]
    elif req_model == 'NaiveBayes':
        print(req_model)
        return naiveBayes.predict(vals)[0]
    else:
        return "Cannot Predict"
Example #2
0
def main():
    savepath = './save_point'
    filepath = './save_point/keras_example_checkpoint.h5'

    # Extract MNIST dataset
    train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
    train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
    test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
    test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')

    train_data = extract_data(train_data_filename, 60000, dense=False)
    train_data = train_data.reshape((60000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE))
    train_labels = extract_labels(train_labels_filename, 60000, one_hot=True)
    test_data = extract_data(test_data_filename, 10000, dense=False)
    test_data = test_data.reshape((10000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE))
    test_labels = extract_labels(test_labels_filename, 10000, one_hot=True)

    validation_data = train_data[:VALIDATION_SIZE, ...]
    validation_labels = train_labels[:VALIDATION_SIZE, :]
    validation_set = (validation_data, validation_labels)
    train_data = train_data[VALIDATION_SIZE:, ...]
    train_labels = train_labels[VALIDATION_SIZE:, ...]

    # Model construction
    model = Sequential()
    model.add(Convolution2D(32, 3, 3, border_mode='same',
              input_shape=(1, 28, 28)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Convolution2D(64, 3, 3, border_mode='same'))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))

    # Define optimizer and configure training process
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"])

    model.fit(
        train_data,
        train_labels,
        nb_epoch=NUM_EPOCHS,
        batch_size=1000,
        validation_data=validation_set)

    print 'Save model weights'
    if not os.path.isdir (savepath):
        os.mkdir (savepath)
    model.save_weights(filepath, overwrite=True)

    predict = model.predict(test_data, batch_size=1000)

    print 'Test err: %.1f%%' % error_rate(predict, test_labels)

    print 'Test loss: %1.f%%, accuracy: %1.f%%', \
        tuple(model.evaluate(test_data, test_labels, batch_size=1000))
Example #3
0
 def test_extract_data(self):
     notepad_app_hash = "TxTaPpHaSh"
     self.assertEqual(extract_data(None), None)
     self.assertEqual(extract_data(""), None)
     self.assertEqual(extract_data("bad_input"), None)
     self.assertEqual(
         extract_data("https://www.w3.org/TR/PNG/iso_8859-1.txt"),
         notepad_app_hash)
Example #4
0
def build_model_and_evaluate(data, target, classifier="XGB"):
    model = Model1()

    if data == "face":
        df_X = model.fetch_face_data()
    elif data == "text":
        df_X = model.fetch_text_data()
    elif data == "relation":
        df_X = model.fetch_relation_data()
    else:
        raise ValueError("Incorrect data format")

    X, y = utils.extract_data(df_X, label=target)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=2)

    if classifier == "xgb":
        clf = XGBClassifier(n_estimators=200)
    elif classifier == "svm":
        clf = SGDClassifier()
    else:
        raise ValueError("Incorrect classifier")

    clf = clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    return accuracy_score
def process_incidents(logger):
    try:
        conn, cur, dict_cur = utils.get_database_connection()
    except Exception as e:
        logger.error("Getting database connection")
        sys.exit("Unable to get database connection")

    url = utils.build_extract_url(logger)
    logger.info("Starting the extract")
    results = utils.extract_data(url, logger)
    number_results = len(results)
    logger.info(f"Extracted {number_results} records")
    load_status = utils.load_data(conn, cur, results, logger)
    logger.info(f"Load status: {load_status}")
    if load_status == 'success':
        incidents = utils.get_new_incidents(dict_cur)
        number_incidents = len(incidents)
        logger.info(f"Found {number_incidents} incidents")
        if number_results > 0:
            api = utils.get_twitter_auth()
            for incident in incidents:
                tweet_success = utils.update_status(api, incident, conn, cur)
                if tweet_success:
                    logger.info("Tweet status posted successfully")
                else:
                    logger.error("Posting tweet status")
    conn.close()
    cur.close()
    dict_cur.close()
Example #6
0
def invoice_template():
    from models import Invoice
    invoice_id = request.form.get('id', None)
    if invoice_id is None:
        return 'wrong parameters sent'
    invoice = Invoice.query.filter(invoice_id == invoice_id).first()
    factor_data = extract_data(invoice)
    return render_template('invoice_template.html', **factor_data)
Example #7
0
 def save(self, name_att, input, data_type):
     print(name_att, input, data_type)
     data = utils.extract_data(input, data_type)
     self.chat_bot.add_att(name_att, data)
     ans = self.chat_bot.save_user()
     if ans == "NEW":
         self.set_next(self.list_answers[1][1])
     else:
         self.set_next(self.list_answers[0][1])
     return ans
Example #8
0
def invoice_factor():
    from models import Invoice
    invoice_id = request.form.get('id', None)
    if invoice_id is None:
        return 'wrong parameters sent'
    invoice = Invoice.query.filter(invoice_id == invoice_id).first()
    file_path = os.path.join(files_dir, '{}.pdf'.format(invoice.number))
    static_path = os.path.join(static_dir, 'css/style.css')
    factor_data = extract_data(invoice)
    HTML(string=render_template('invoice.html', **factor_data)).write_pdf(
        target=file_path, stylesheets=[static_path])
    return send_file(file_path)
Example #9
0
 def check_input(self, input):
     input = utils.extract_data(input, self.data_type)
     check = utils.check_input_type(input, self.data_type)
     print("TIMEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE", input, check)
     if check:
         self.chat_bot.add_att(self.attribute, input)
     else:
         inext = self.nodej["except"]
         if inext != 0:
             self.set_next(inext)
             check = True
     return check
 def do_where(self, my_df, attr, value, opr):
     tbl, attr = self.extract_ta(attr)
     # if tbl is None:
     #     pass
     # else:
     #     table = self.alias_map[tbl]
     if isinstance(value, list):
         return self.do_dynamic_where(my_df, attr, value[0], opr, value[2],
                                      value[1])
     elif utils.is_float(value) or utils.is_date(value) or utils.is_quoted(
             value):
         par = utils.extract_data(value)
         return self.do_fix_where(my_df, attr, par, opr)
     else:
         return self.do_dynamic_where(my_df, attr, value, opr)
Example #11
0
def build_model_and_evaluate_rms(data, regressor="XGB"):
    model = Model1()

    if data == "face":
        df_X = model.fetch_face_data()
    elif data == "text":
        df_X = model.fetch_text_data()
    elif data == "relation":
        df_X = model.fetch_relation_data()
    else:
        raise ValueError("Incorrect data format")

    X, y = utils.extract_data(df_X, label="personality")
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=2)

    if regressor == "xgb":
        reg = MultiOutputRegressor(
            XGBRegressor(n_estimators=200,
                         max_depth=2,
                         objective="reg:squarederror"))
    elif regressor == "rf":
        reg = MultiOutputRegressor(RandomForestRegressor(n_estimators=100))

    elif regressor == "lasso":
        reg = ""

    elif regressor == "lightgbm":
        reg = MultiOutputRegressor(
            lightgbm.LGBMRegressor(objective="regression"))
    else:
        raise ValueError("Incorrect classifier")

    reg = reg.fit(X_train, y_train)
    y_pred = reg.predict(X_test)

    # Calculating RMSE for all personality
    rmse = []
    for i, value in enumerate(utils.regressor_labels):
        rmse.append(sqrt(mean_squared_error(y_pred[:, i], y_test[value])))

    return rmse
def build_model_and_evaluate(data: List[str], target: str, classifier="XGB"):
  model = Model2EarlyFusion()

  df_X = combine_features(data)

  X, y = utils.extract_data(df_X, label=target)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2)

  if classifier == "xgb":
    clf = XGBClassifier(n_estimators=200)
  elif classifier == "svm":
    clf = SGDClassifier()
  else:
    raise ValueError("Incorrect classifier")

  clf = clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  score = accuracy_score(y_test, y_pred)
  return accuracy_score
Example #13
0
    def read(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, nb_classes=2):
        images, labels = extract_data('./data/')
        labels = np.reshape(labels, [-1])
        X_train, X_test, y_train, y_test = train_test_split(
            images, labels, test_size=0.3, random_state=random.randint(0, 100))
        X_valid, X_test, y_valid, y_test = train_test_split(
            images, labels, test_size=0.5, random_state=random.randint(0, 100))

        # Tensoflow ordering:
        assert Keras.image_dim_ordering() == 'tf'
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
        X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 3)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
        input_shape = (img_rows, img_cols, 3)

        # The data, shuffled and split between train and test sets:
        adfis('X_train shape:', X_train.shape)
        adfis(X_train.shape[0], 'train samples')
        adfis(X_valid.shape[0], 'valid samples')
        adfis(X_test.shape[0], 'test samples')

        # Convert class vectors to binary class matrices:
        Y_train = np_utils.to_categorical(y_train, nb_classes)
        Y_valid = np_utils.to_categorical(y_valid, nb_classes)
        Y_test = np_utils.to_categorical(y_test, nb_classes)

        X_train = X_train.astype('float32')
        X_valid = X_valid.astype('float32')
        X_test = X_test.astype('float32')

        X_train /= 255
        X_valid /= 255
        X_test /= 255

        self.X_train = X_train
        self.X_valid = X_valid
        self.X_test = X_test

        self.Y_train = Y_train
        self.Y_valid = Y_valid
        self.Y_test = Y_test
Example #14
0
def test_model(model, test_data_path, output_dir, output_file_name, eval_file_name):
    # Read and extract test data set
    test_data = pd.read_csv(test_data_path,
                            sep='\t',
                            names=header_name,
                            header=None,
                            usecols=[0, 1, 2]).iloc[:, 0:3]
    test_data, x_test, y_test = extract_data(test_data)
    print("Correct labels:\n", np.array(y_test), "\n")

    # Start predicting line by line and write to output file
    output_path = os.path.join(output_dir, output_file_name)
    output_file = open(output_path, "w")

    predictions = []
    num_of_correct = 0
    for index, row in test_data.iterrows():
        # Make prediction
        line_prediction, max_score = model.predict_line(row['text'])
        predictions.append(line_prediction)

        # Evaluate if the prediction is correct or not
        line_prediction = "yes" if line_prediction else "no"
        target = "yes" if row['q1_label'] else "no"
        outcome = "correct" if line_prediction == target else "wrong"

        if (outcome == 'correct'):
            num_of_correct += 1

        # Write result to file
        content = """{}  {}  {:.2E}  {}  {}\n""".format(
            row['tweet_id'], line_prediction, max_score, target, outcome)

        output_file.write(content)

    output_file.close()
    print("Predicted labels:\n", predictions)
    print("Trace file produced: ", output_path)
    # TODO: Calculate and print out precision and stats
    evaluate(y_test.tolist(), predictions,
             os.path.join(output_dir, eval_file_name), num_of_correct)
def build_model_and_evaluate_rms(data, regressor="XGB"):
  model = Model2EarlyFusion()

  df_X = combine_features(data)

  X, y = utils.extract_data(df_X, label="personality")
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2)

  reg = MultiOutputRegressor(XGBRegressor(n_estimators=200,
                                          max_depth=2,
                                          objective="reg:squarederror"))

  reg = reg.fit(X_train, y_train)
  y_pred = reg.predict(X_test)

  # Calculating RMSE for all personality
  rmse = []
  for i, value in enumerate(utils.regressor_labels):
    rmse.append(sqrt(mean_squared_error(y_pred[:, i], y_test[value])))

  return rmse
Example #16
0
 def __download_data(self, url, out_path):
     """download a url and save it in the out_path
     :url the: url to download the records
     :out_path: the path were the file is saved
     :return: the number records extracted
     """
     r = requests.get(url)
     log.info('processing url {}'.format(url))
     time.sleep(0.5)
     if r.status_code is 200:
         data = utils.extract_data(r.json())
         if len(data) > 0:
             log.info('saving file {}'.format(out_path))
             with open(out_path, 'w') as f:
                 json.dump(data, f)
             return len(data)
         else:
             log.error('no data for url {}'.format(url))
     else:
         log.error('go respone {} for url {}'.format(r.status_code, url))
     r.close()
     return 0
Example #17
0
def main():
    # load config file
    config = load_config(config_path)

    # build dict for token (vocab_dict) and char (vocab_c_dict)
    vocab_dict, vocab_c_dict = build_dict(vocab_path, vocab_char_path)

    # load pre-trained embedding
    # W_init: token index * token embeding
    # embed_dim: embedding dimension
    W_init, embed_dim = load_word2vec_embedding(word_embedding_path, vocab_dict)
    
    K = 3

    # generate train/valid examples
    train_data, sen_cut_train = generate_examples(train_path, vocab_dict, vocab_c_dict, config, "train")
    dev_data, sen_cut_dev = generate_examples(valid_path, vocab_dict, vocab_c_dict, config, "dev")

    #------------------------------------------------------------------------
    # training process begins
    hidden_size = config['nhidden']
    batch_size = config['batch_size']

    coref_model = model.CorefQA(hidden_size, batch_size, K, W_init, config).to(device)

    if len(sys.argv) > 4 and str(sys.argv[4]) == "load":
        try:
            coref_model.load_state_dict(torch.load(torch_model_p))
            print("saved model loaded")
        except:
            print("no saved model")

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(coref_model.parameters(), lr=config['learning_rate']) # TODO: use hyper-params in paper

    iter_index = 0
    batch_acc_list = []
    batch_loss_list = []
    dev_acc_list = []

    max_iter = int(config['num_epochs'] * len(train_data) / batch_size)
    print("max iteration number: " + str(max_iter))

    while True:
        # building batch data
        # batch_xxx_data is a list of batch data (len 15)
        # [dw, m_dw, qw, m_qw, dc, m_dc, qc, m_qc, cd, m_cd, a, dei, deo, dri, dro]
        batch_train_data, sen_cut_batch = generate_batch_data(train_data, config, "train", -1, sen_cut_train)  # -1 means random sampling
        # dw, m_dw, qw, m_qw, dc, m_dc, qc, m_qc, cd, m_cd, a, dei, deo, dri, dro = batch_train_data

        print(len(sen_cut_batch))

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        dw, dc, qw, qc, cd, cd_m = extract_data(batch_train_data)
        cand_probs = coref_model(dw, dc, qw, qc, cd, cd_m, sen_cut_batch) # B x Cmax

        answer = torch.tensor(batch_train_data[10]).type(torch.LongTensor) # B x 1
        loss = criterion(cand_probs, answer)

        # evaluation process
        acc_batch = cal_acc(cand_probs, answer, batch_size)
        batch_acc_list.append(acc_batch)
        batch_loss_list.append(loss)
        dev_acc_list = evaluate_result(iter_index, config, dev_data, batch_acc_list, batch_loss_list, dev_acc_list, coref_model, sen_cut_dev)

        # save model
        if iter_index % config['model_save_frequency'] == 0 and len(sys.argv) > 4:
            torch.save(coref_model.state_dict(), torch_model_p)

        # back-prop
        loss.backward()
        optimizer.step()

        # check stopping criteria
        iter_index += 1
        if iter_index > max_iter: break
Example #18
0
def main():
	savepath = './save_point'
	filepath = './save_point/model_api_checkpoint.h5'
	train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
	train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
	test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
	test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')

	train_data = extract_data(train_data_filename, 60000, dense=False)
	train_data = train_data.reshape((60000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE))
	train_labels = extract_labels(train_labels_filename, 60000, one_hot=True)
	test_data = extract_data(test_data_filename, 10000, dense=False)
	test_data = test_data.reshape((10000, NUM_CHANNELS, IMG_SIZE, IMG_SIZE))
	test_labels = extract_labels(test_labels_filename, 10000, one_hot=True)

	validation_data = train_data[:VALIDATION_SIZE, ...]
	validation_labels = train_labels[:VALIDATION_SIZE, :]
	validation_set = (validation_data, validation_labels)
	train_data = train_data[VALIDATION_SIZE:, ...]
	train_labels = train_labels[VALIDATION_SIZE:, ...]

	img = Input(shape=(1, 28, 28))
	conv1 = Convolution2D(32, 3, 3, border_mode='same')(img)
	conv1 = Activation('relu')(conv1)
	pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
	conv2_1 = Convolution2D(64, 3, 3, border_mode='same')(pool1)
	conv2_2 = Convolution2D(64, 5, 5, border_mode='same')(pool1)
	conv2_1 = Activation('relu')(conv2_1)
	conv2_2 = Activation('relu')(conv2_2)
	pool2_1 = MaxPooling2D(pool_size=(2, 2))(conv2_1)
	pool2_2 = MaxPooling2D(pool_size=(2, 2))(conv2_2)
	dense1 = Flatten()(pool2_1)
	dense2 = Flatten()(pool2_2)
	dense = merge([dense1, dense2], mode='concat', concat_axis=1)
	dense = Dense(512)(dense)
	dense = Activation('relu')(dense)
	dense = Dense(256)(dense)
	dense = Activation('relu')(dense)
	dense = Dense(10)(dense)
	output = Activation('softmax')(dense)

	model = Model(input=[img], output=[output])

	sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9)

	model.compile(
					optimizer=sgd,
					loss=['categorical_crossentropy'],
					metrics=["accuracy"])

	model.fit(
					[train_data],
					[train_labels],
					nb_epoch=1,
					verbose=1,
					batch_size=1000,
					validation_data=validation_set)

	print 'Save model weights'
	if not os.path.isdir (savepath):
		os.mkdir (savepath)
	model.save_weights(filepath, overwrite=True)


	predictions = model.predict([test_data],
	                            batch_size=1000)

	print 'Test error: %.1f%%' % error_rate(predictions, test_labels)

	print 'Test loss: %.14f, Test accurracy %.4f' % \
	      tuple(model.evaluate([test_data], [test_labels], batch_size=1000))
Example #19
0

if __name__ == '__main__':
    tick = time.time()
    args = get_parser(sys.argv[1:])
    #    args = get_parser(['--CITY', 'NYK', '--LOG_DIR', 'log',
    #                   '--WITH_TIME', '--normalize_weight'])
    os.environ['CUDA_VISIBLE_DEVICES'] = args.device

    data, dicts = load_data(
        os.path.join(
            args.ROOT, 'data',
            '{}_INTV_processed_voc5_len2_setting_WITH_GPS_WITH_TIME_WITH_USERID.pk'
            .format(args.CITY)))
    args.vocabulary_size = dicts.vocabulary_size
    data, idx = extract_data(data, args)  #put all data_extraction here
    train_data = get_train_data(data)

    dataloader = DataLoader(train_data, args)
    dataloader_time = DataLoader_time(data, args, idx)
    evaluator_emb = Evaluator(args, dicts, mode='emb')
    evaluator_weight = Evaluator(args, dicts, mode='weight')
    logger = Logger(os.path.join(args.LOG_DIR, 'log_txt'))

    graph = tf.Graph()
    with graph.as_default():
        model = STSkipgram(args)
        sess = tf.Session(graph=graph, config=config)
    state = train(graph, sess, model, args, evaluator_emb, evaluator_weight,
                  logger, dataloader, dataloader_time)
    sess.close()
Example #20
0
    X = np.concatenate([X] +
                       [np.apply_along_axis(shift, 1, X, vector)
                        for vector in direction_vectors])
    print X.shape
    y = np.concatenate([y for _ in range(len(direction_vectors) + 1)], axis=0)
    print y.shape
    return X, y


# Extract data
train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')

X_train = extract_data(train_data_filename, 60000, dense=True)
y_train = extract_labels(train_labels_filename, 60000, one_hot=False)
X_test = extract_data(test_data_filename, 10000, dense=True)
y_test = extract_labels(test_labels_filename, 10000, one_hot=False)


#################################################
# Test for decision tree classifier without dimensionality reduction
Tree = DecisionTreeClassifier()
Tree.fit(X_train, y_train)
print 'Without dimenstionality reduction: ', Tree.score(X_test, y_test)

# Dimensionality reduction using PCA (784 -> 64)
pca = PCA(n_components=64)
pca.fit(X_train)
X_train_reduce = pca.transform(X_train)
Example #21
0
    print(
        "We have ", aug_func_count, " augmentation function in our model"
        " with an augmentation factor of ", aug_factor)

    # Read our dataset
    dataLog_orig = utils.read_data_log(data_filename)
    # Get rid of some noisy data...
    utils.visualize_data(dataLog_orig)

    dataLog = dataLog_orig.loc[dataLog_orig['throttle'] > 0.25]
    print("Loaded data info: ")
    dataLog.info()

    filenames, steering = utils.extract_data(dataLog,
                                             remove_zeros=False,
                                             round_steering=True)

    total_sample = len(filenames)
    train_files, val_files, train_steering, val_steering = train_test_split(
        filenames, steering, test_size=0.33, random_state=543)
    train_samples = len(train_files)
    val_samples = len(val_files)
    print("Total Sample: ", total_sample, " Training samples : ",
          train_samples, " Validation samples: ", val_samples)
    batch_size = args.batch
    epochs = args.epochs

    model = model()
    model.summary()
Example #22
0
def main(argv=None):  # pylint: disable=unused-argument
  # Get the data.
  train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
  train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
  test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
  test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')


  # Extract it into numpy arrays.
  train_data = extract_data(train_data_filename, 60000, dense=False)
  train_labels = extract_labels(train_labels_filename, 60000, one_hot=True)
  test_data = extract_data(test_data_filename, 10000, dense=False )
  test_labels = extract_labels(test_labels_filename, 10000, one_hot=True)


  # Generate a validation set.
  validation_data = train_data[:VALIDATION_SIZE, ...]
  validation_labels = train_labels[:VALIDATION_SIZE]
  train_data = train_data[VALIDATION_SIZE:, ...]
  train_labels = train_labels[VALIDATION_SIZE:]
  num_epochs = NUM_EPOCHS
  train_size = train_labels.shape[0]

  # This is where training samples and labels are fed to the graph.
  # These placeholder nodes will be fed a batch of training data at each
  # training step using the {feed_dict} argument to the Run() call below.
  train_data_node = tf.placeholder(
      tf.float32,
      shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
  train_labels_node = tf.placeholder(tf.float32,
                                     shape=(BATCH_SIZE, NUM_LABELS))
  eval_data = tf.placeholder(
      tf.float32,
      shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

  # The variables below hold all the trainable weights. They are passed an
  # initial value which will be assigned when when we call:
  # {tf.initialize_all_variables().run()}

  # First convolutional layer
  conv1_weights = tf.Variable(
      tf.truncated_normal([3, 3, NUM_CHANNELS, 32],  # 5x5 filter, depth 32.
                          stddev=0.1,
                          seed=SEED))
  conv1_biases = tf.Variable(tf.zeros([32]))

  # Two second convolutional layers 5 x 5 filter, and 3 x 3 filters.
  conv2_weights = tf.Variable(
      tf.truncated_normal([5, 5, 32, 64],
                          stddev=0.1,
                          seed=SEED))
  conv2_biases = tf.Variable(tf.constant(0.01, shape=[64]))

  conv2_weights2 = tf.Variable(
      tf.truncated_normal([3, 3, 32, 64],
                          stddev=0.1,
                          seed=SEED))
  conv2_biases2 = tf.Variable(tf.constant(0.01, shape=[64]))

  # First fully connected layer after conv layer
  fc1_weights = tf.Variable(  # fully connected, depth 512.
      tf.truncated_normal(
          [IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 128, 512],
          stddev=0.05,
          seed=SEED))
  fc1_biases = tf.Variable(tf.constant(0.01, shape=[512]))

  # Second fully connected layer
  fc2_weights = tf.Variable(
      tf.truncated_normal([512, 256],
                          stddev=0.05,
                          seed=SEED))
  fc2_biases = tf.Variable(tf.constant(0.1, shape=[256]))

  # Output layer
  fc3_weights = tf.Variable(
      tf.truncated_normal([256, NUM_LABELS],
                          stddev=0.04,
                          seed=SEED))
  fc3_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))


  # We will replicate the model structure for the training subgraph, as well
  # as the evaluation subgraphs, while sharing the trainable parameters.
  def model(data, train=False):
    """The Model definition."""
    # 2D convolution, with 'SAME' padding (i.e. the output feature map has
    # the same size as the input). Note that {strides} is a 4D array whose
    # shape matches the data layout: [image index, y, x, depth].
    conv = tf.nn.conv2d(data,
                        conv1_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')
    # Bias and rectified linear non-linearity.
    relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
    if train:
        relu = tf.nn.dropout(relu, .5)
    # Max pooling. The kernel size spec {ksize} also follows the layout of
    # the data. Here we have a pooling window of 2, and a stride of 2.
    pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
    conv = tf.nn.conv2d(pool,
                        conv2_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')
    relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
    conv2 = tf.nn.conv2d(pool,
                         conv2_weights2,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases2))

    pool = tf.nn.max_pool(relu,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
    pool2 = tf.nn.max_pool(relu2,
                           ksize=[1, 2, 2, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME')
    # Reshape the feature map cuboid into a 2D matrix to feed it to the
    # fully connected layers.
    pool = tf.concat(3, [pool, pool2])
    pool_shape = pool.get_shape().as_list()
    reshape = tf.reshape(
        pool,
        [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
    # Fully connected layer. Note that the '+' operation automatically
    # broadcasts the biases.
    hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
    hidden = tf.nn.relu(tf.matmul(hidden, fc2_weights) + fc2_biases)
    # Add a 50% dropout during training only. Dropout also scales
    # activations such that no rescaling is needed at evaluation time.
    if train:
      hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
    return tf.matmul(hidden, fc3_weights) + fc3_biases

  def extract_filter (data):
    conv = tf.nn.conv2d(data,
                        conv1_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')
    # Bias and rectified linear non-linearity.
    relu1 = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))

    # Max pooling. The kernel size spec {ksize} also follows the layout of
    # the data. Here we have a pooling window of 2, and a stride of 2.
    pool = tf.nn.max_pool(relu1,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')
    conv = tf.nn.conv2d(pool,
                        conv2_weights,
                        strides=[1, 1, 1, 1],
                        padding='SAME')
    relu2 = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
    conv2 = tf.nn.conv2d(pool,
                         conv2_weights2,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    relu3 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases2))

    return relu1, relu2, relu3


  # Training computation: logits + cross-entropy loss.
  logits = model(train_data_node, True)
  loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
      logits, train_labels_node))

  # L2 regularization for the fully connected parameters.
  regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                  tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases) +
                  tf.nn.l2_loss(fc3_weights) + tf.nn.l2_loss(fc3_biases))
  # Add the regularization term to the loss.
  loss += 5e-4 * regularizers

  # Optimizer: set up a variable that's incremented once per batch and
  # controls the learning rate decay.
  batch = tf.Variable(0)
  # Decay once per epoch, using an exponential schedule starting at 0.01.
  learning_rate = tf.train.exponential_decay(
      0.01,                # Base learning rate.
      batch * BATCH_SIZE,  # Current index into the dataset.
      train_size,          # Decay step.
      0.95,                # Decay rate.
      staircase=True)
  # Use simple momentum for the optimization.
  optimizer = tf.train.MomentumOptimizer(learning_rate,
                                         0.9).minimize(loss,
                                                       global_step=batch)

  # Predictions for the current training minibatch.
  train_prediction = tf.nn.softmax(logits)

  # Predictions for the test and validation, which we'll compute less often.
  eval_prediction = tf.nn.softmax(model(eval_data))

  # Small utility function to evaluate a dataset by feeding batches of data to
  # {eval_data} and pulling the results from {eval_predictions}.
  # Saves memory and enables this to run on smaller GPUs.
  def eval_in_batches(data, sess):
    """Get all predictions for a dataset by running it in small batches."""
    size = data.shape[0]
    if size < EVAL_BATCH_SIZE:
      raise ValueError("batch size for evals larger than dataset: %d" % size)
    predictions = numpy.ndarray(shape=(size, NUM_LABELS), dtype=numpy.float32)
    for begin in xrange(0, size, EVAL_BATCH_SIZE):
      end = begin + EVAL_BATCH_SIZE
      if end <= size:
        predictions[begin:end, :] = sess.run(
            eval_prediction,
            feed_dict={eval_data: data[begin:end, ...]})
      else:
        batch_predictions = sess.run(
            eval_prediction,
            feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]})
        predictions[begin:, :] = batch_predictions[begin - size:, :]
    return predictions

  # Create a local session to run the training.
  saver = tf.train.Saver()
  start_time = time.time()
  with tf.Session() as sess:
    # Run all the initializers to prepare the trainable parameters.
    if FLAGS.model:
      saver.restore(sess, FLAGS.model)  # If model exists, load it
    else:
      sess.run(tf.initialize_all_variables())  # If there is no model randomly initialize
    if FLAGS.train:
      # Loop through training steps.
      for step in xrange(int(num_epochs * train_size) // BATCH_SIZE):
        # Compute the offset of the current minibatch in the data.
        # Note that we could use better randomization across epochs.
        offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_data = train_data[offset:(offset + BATCH_SIZE), ...]
        batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
        # This dictionary maps the batch data (as a numpy array) to the
        # node in the graph is should be fed to.
        feed_dict = {train_data_node: batch_data,
                     train_labels_node: batch_labels}
        # Run the graph and fetch some of the nodes.
        _, l, lr, predictions = sess.run(
            [optimizer, loss, learning_rate, train_prediction],
            feed_dict=feed_dict)
        if step % EVAL_FREQUENCY == 0:
          elapsed_time = time.time() - start_time
          start_time = time.time()
          print('Step %d (epoch %.2f), %.1f ms' %
                (step, float(step) * BATCH_SIZE / train_size,
                 1000 * elapsed_time / EVAL_FREQUENCY))
          print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
          print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels))
          print('Validation error: %.1f%%' % error_rate(
              eval_in_batches(validation_data, sess), validation_labels))
          sys.stdout.flush()
      # Finally print the result!
      test_error = error_rate(eval_in_batches(test_data, sess), test_labels)
      print('Test error: %.1f%%' % test_error)
      print ('Optimization done')
      print ('Save models')
      if not tf.gfile.Exists("./conv_save"):
          tf.gfile.MakeDirs("./conv_save")
      saver_path = saver.save(sess, "./conv_save/model.ckpt")
      print ('Successfully saved file: %s' % saver_path)
    else:  # If train flag is false, execute image extraction routine
      print ("Filter extraction routine")
      aa = train_data[1:2, :, :, :]
      print (aa.shape)
      # Run extract filter operations (conv1, conv2 and conv3 layers)
      images = sess.run(extract_filter(train_data[1:2, :, :, :]))
      print (images[2].shape)
      plt.imshow (images[2][0, :, :, 32] * 255 + 255 / 2, cmap='gray')
      # plt.imshow (images[2][0, :, :, 32], cmap='gray')
      plt.show ()
      # Save all outputs
      for i in range (3):
        filter_shape = images[i].shape
        img_size = [filter_shape[1], filter_shape[2]]
        print (img_size)
Example #23
0
def twv_variable_calculations(target, yolo_output, keyword_indices, calcs_for_atwv_map, config_dict, threshold):

    C = config_dict["C"]
    B = config_dict["B"]
    K = config_dict["K"]

    #pred conf: the confidence of every box (4X6X2)
    #pred_class_all_prob ([4, 6, 2, 1000])
    pred_ws, pred_start, pred_end, pred_conf, pred_class_all_prob = utils.extract_data(yolo_output, C, B, K)

    pred_classes_prob, pred_classes = torch.max(pred_class_all_prob, 3) #max on Classes (K)
    conf_class_mult, box_index = torch.max(( pred_conf* pred_classes_prob), 2) #max on p*K


    gt_thresh_idx = (torch.gt(conf_class_mult, threshold).long() * (pred_classes[:,:,0] +1)).nonzero()

    #for false negatives
    non_zero_indices = (target + 1).nonzero() #([[0, 0], [1, 1],...

    for batch, target_keyword in non_zero_indices:
        target_keyword = target_keyword.item()
        if target_keyword in keyword_indices:
            if  target_keyword not in calcs_for_atwv_map:
                calcs_for_atwv_map[target_keyword] = [0, 0, 0]

            n_true = calcs_for_atwv_map[target_keyword][2]

            n_true+=1 #true number of occurences of term in corpus

            calcs_for_atwv_map[target_keyword][2] = n_true

    count_doubles = {} #count if a word occurred twice in a single example
    for batch_idx_pred, predict_keyword in gt_thresh_idx:
        pred_cell = predict_keyword.item()
        pred_word = pred_classes[batch_idx_pred.item(), pred_cell, 0].item()
        if pred_word in keyword_indices:
            if  batch_idx_pred.item() not in count_doubles:
                count_doubles[batch_idx_pred.item()] = []
            if pred_word in count_doubles[batch_idx_pred.item()]:
                #pdb.set_trace()
                continue #ignore words that appeared twice
            else:
                count_doubles[batch_idx_pred.item()].append(pred_word)

            if pred_word not in  calcs_for_atwv_map:
                calcs_for_atwv_map[pred_word] = [0, 0, 0]

            #pdb.set_trace()
            n_correct = calcs_for_atwv_map[pred_word][0]
            n_spurious = calcs_for_atwv_map[pred_word][1]

            #find if word really was there
            exists = 0
            for batch, target_keyword in non_zero_indices:
                if batch.item() ==  batch_idx_pred.item():
                    if target_keyword.item() == pred_word:
                        exists = 1
                        break
                if batch.item() > batch_idx_pred.item():
                    break

            n_correct += exists
            if exists == 0: n_spurious += 1

            calcs_for_atwv_map[pred_word][0] = n_correct
            calcs_for_atwv_map[pred_word][1] = n_spurious
Example #24
0
def convert_yolo_tags(pred, c, b, k, threshold):

    ''' 
    YOLO's outputs are tags given in format: (cell_i, box_j, (t, delta_t, p_b_{i,j}), p_{c_i}(k) ).
    This function converts it to tags in the following format: (start, end, word)

    inputs:
    pred: prediction or given target labels, in yolo format
    c: number of cells
    b: number of timing boxes
    k: number of keywords
    threshold: if the product of: p_b_{i,j} * p_{c_i}(k) is greather than the threshold, we predict that a keyword exists.

    output:
    final_pred_labels: dictionary, whose keys are the keywords. Every keyword has an array of (start, end) values.

    '''
    
    pred_ws, pred_start, pred_end, pred_conf, pred_class_prob = utils.extract_data(pred, c, b, k)
    class_max, class_indices = torch.max(pred_class_prob, 3)
    conf_max, box_indices = torch.max((pred_conf * class_max), 2)

    pass_conf = (conf_max >= threshold).float()
    labels = []
    for batch in range(0, pred.size(0)):
        for cell_i in range(0, pred.size(1)):
            if pass_conf[batch, cell_i].item() <= 0:
                continue
            selected_box_index = box_indices[batch, cell_i].item()
            selected_class_index = class_indices[batch, cell_i, 0].item()
            label_start = pred_start[batch, cell_i, selected_box_index].item()
            label_end = pred_end[batch, cell_i, selected_box_index].item()
            x = (label_end + label_start)/2
            w = pred_ws[batch, cell_i, selected_box_index].item()
            labels.append([cell_i, x, w, selected_class_index, batch])

    width_cell = 1. / c  # width per cell
    final_pred_labels = {}

    for label in labels:
        real_x = (label[0] * width_cell + label[1])  # label[1] was already multiple with width cell
        real_w = label[2]
        cur_start = (real_x - float(real_w) / 2.0)
        cur_end = (real_x + float(real_w) / 2.0)
        cur_class = str(label[4])+ "_" + str(label[3]) # batch_class

        if cur_class not in final_pred_labels:
            final_pred_labels[cur_class] = []

        else:
            prev_start = final_pred_labels[cur_class][-1][0]
            prev_end = final_pred_labels[cur_class][-1][1]
            if cur_start >= prev_end and cur_end >= prev_start:
                # --------
                #          -------
                if cur_end - prev_end <= GAP_THRESH:
                    final_pred_labels[cur_class].pop() #remove last item
                    cur_start = prev_start
            elif cur_start <= prev_end and prev_start <= cur_end:
                # --------
                #      -------
                final_pred_labels[cur_class].pop() #remove last item
                cur_start = prev_start
            elif cur_start >= prev_start and cur_end <= prev_end:
                # -----------
                #    ----
                final_pred_labels[cur_class].pop() #remove last item
                cur_start = prev_start
                cur_end = pred_end
            elif cur_start >= prev_start and cur_end >= pred_end:
                #     -----
                #   ---------
                final_pred_labels[cur_class].pop() #remove last item

        final_pred_labels[cur_class].append([cur_start, cur_end])
        # print "objet- start:{}, end:{}, class:{}".format(pred_start,pred_end, pred_class)

    return final_pred_labels
Example #25
0
        np.apply_along_axis(shift, 1, X, vector)
        for vector in direction_vectors
    ])
    print X.shape
    y = np.concatenate([y for _ in range(len(direction_vectors) + 1)], axis=0)
    print y.shape
    return X, y


# Extract data
train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')

X_train = extract_data(train_data_filename, 60000, dense=True)
y_train = extract_labels(train_labels_filename, 60000, one_hot=False)
X_test = extract_data(test_data_filename, 10000, dense=True)
y_test = extract_labels(test_labels_filename, 10000, one_hot=False)

#################################################
# Test for decision tree classifier without dimensionality reduction
Tree = DecisionTreeClassifier()
Tree.fit(X_train, y_train)
print 'Without dimenstionality reduction: ', Tree.score(X_test, y_test)

# Dimensionality reduction using PCA (784 -> 64)
pca = PCA(n_components=64)
pca.fit(X_train)
X_train_reduce = pca.transform(X_train)
        return len(self.data)

    def __getitem__(self, idx):
        return (self.data[idx], self.label[idx])


if __name__ == "__main__":

    #Parameters for the dataset
    chunk_size = 200

    #Load in the input data
    dirs = extract_file_names(
        "/home/alex/Projects/Unsupervised/kepler_q9_variability/")

    data = extract_data(dirs)

    data = split_to_chunk(data, chunk_size)

    datalist = convert_datalist(data)

    datalist = normalise(datalist)

    data_arr = np.vstack(datalist)

    with open("autoencoder_dataset.pkl", "wb") as f:
        pickle.dump(data_arr, f)
        print("Written ae_dataset.pkl")

    ### Plotting
    #for i in range(0, 100):
Example #27
0
            train_labels,
            validation_data,
            validation_labels,
            epochs=8)
# model.save("model_saves/NN_model.ckpt")
# model.load("model_saves/NN_model_e7.ckpt")

######################################################
# Test set accuracy for the model
test_predictions = model.predict(test_data)
correct = np.sum(test_predictions == test_labels)
print("Accuracy:", correct / len(test_data))

######################################################
# Labeling the south part of the image
south_img = utils.extract_data("data/test_south.tif")
south_img_shape = np.shape(south_img)
south_data = south_img.reshape(shape=(np.shape(south_img)[0] *
                                      np.shape(south_img)[1],
                                      np.shape(south_img)[2]))
south_data = utils.standardize(south_data)
south_predictions = model.predict(south_data)
south_predictions_img = predictions.reshape(
    (south_img_shape[0], south_img_shape[1]))

## applying a denoising filter:
# south_predictions_img = utils.denoise(south_predictions_img)
south_predictions_img = utils.to_RGB(south_predictions_img)

plt.imshow(south_predictions_img)
plt.axis('off')
Example #28
0
def yolo_accuracy(prediction, target, C, B, K, T, iou_t=0.5, is_cuda=False):

    correct_class_high_iou = 0
    correct_class_low_iou = 0
    wrong_class_high_iou = 0
    wrong_class_low_iou = 0
    total_correct_class = 0
    pred_ws, pred_start, pred_end, pred_conf, pred_class_all_prob = utils.extract_data(prediction, C, B, K)

    pred_classes_prob, pred_classes = torch.max(pred_class_all_prob, 3)
    conf_class_mult, box_index = torch.max(( pred_conf* pred_classes_prob), 2)

    no_object_correct = torch.eq((conf_class_mult < T).float(), 1 - target[:, :, -1]).cpu().sum()
    no_object_object_wrong = (torch.eq((conf_class_mult < T).float(), target[:, :, -1])).cpu().sum()

    target_ws, target_start, target_end, target_conf, target_class_all_prob = utils.extract_data(target[:, :, :-1], C, B, K)
    target_classes_prob, target_classes = torch.max(target_class_all_prob, 3)

    squeeze_target_start = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \
        torch.zeros([target_start.size(0), C])
    squeeze_pred_start = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \
        torch.zeros([target_start.size(0), C])
    squeeze_target_end = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \
        torch.zeros([target_start.size(0), C])
    squeeze_pred_end = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \
        torch.zeros([target_start.size(0), C])
    squeeze_target_ws = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \
        torch.zeros([target_start.size(0), C])
    squeeze_pred_ws = torch.zeros([target_start.size(0), C]).cuda() if is_cuda else \
        torch.zeros([target_start.size(0), C])


    box_indices_array = box_index.cpu().numpy()
    for row in range(0, box_indices_array.shape[0]):
        for col in range(0, box_indices_array.shape[1]):
            squeeze_target_start[row, col] = target_start[row, col, box_indices_array[row, col]]
            squeeze_pred_start[row, col] = pred_start[row, col, box_indices_array[row, col]]
            squeeze_target_end[row, col] = target_end[row, col, box_indices_array[row, col]]
            squeeze_pred_end[row, col] = pred_end[row, col, box_indices_array[row, col]]
            squeeze_target_ws[row, col] = target_ws[row, col, box_indices_array[row, col]]
            squeeze_pred_ws[row, col] = pred_ws[row, col, box_indices_array[row, col]]


    intersect_start = torch.max(squeeze_pred_start, squeeze_target_start)
    intersect_end = torch.min(squeeze_pred_end, squeeze_target_end)
    intersect_w = intersect_end - intersect_start

    iou_mask = torch.eq(torch.eq((conf_class_mult > T).float(), target[:, :, -1]).float(), target[:, :, -1])
    iou = intersect_w / (squeeze_pred_ws + squeeze_target_ws - intersect_w)
    iou_select = iou * iou_mask.float()


    mean_iou_correct = 0.0
    mean_iou_wrong = 0.0
    is_object = target[:, :, -1].cpu().numpy()
    for batch in range(0, box_indices_array.shape[0]):
        for cell in range(0, box_indices_array.shape[1]):
            if is_object[batch, cell].item() != 1 or (conf_class_mult > T)[batch,cell].item() !=1:
                continue
            if pred_classes[batch, cell, 0].item() != target_classes[batch, cell, 0].item():  # predict object with wrong class
                if iou_select[batch, cell].item() < iou_t:
                    wrong_class_low_iou += 1
                else:
                    wrong_class_high_iou += 1
                    
                mean_iou_wrong += iou_select[batch, cell].item()
            else:  # predict object with right class
                if iou_select[batch, cell].item() < iou_t:
                    correct_class_low_iou += 1
                else:
                    correct_class_high_iou += 1
                mean_iou_correct += iou_select[batch, cell].item()

            total_correct_class += 1


    return no_object_correct - total_correct_class, no_object_object_wrong, correct_class_high_iou, \
           correct_class_low_iou, wrong_class_high_iou, wrong_class_low_iou, total_correct_class, \
            mean_iou_correct, mean_iou_wrong
    def agent_train(self, ns, r, done=False):
        #convert next state and reward to tensors
        #next_state_v = torch.tensor([next_state],dtype=dtype)
        #reward_v = torch.tensor([reward],dtype=dtype)

        #save the values in the replay buffer
        self.buffer.push(self.state, self.act, r, ns, done)
        #set the state to the next state to advance agent
        self.state = ns

        #if there are enough samples in replay buffer, perform network updates
        if len(self.buffer) >= self.BUFFER_SIZE:
            #get a mini batch from the replay buffer
            sample = self.buffer.sample(self.BATCH_SIZE)
            #make the data nice
            compressed_states, compressed_actions, compressed_next_states, compressed_rewards = utils.extract_data(
                sample)

            #critic network training
            #yt=r(st,at)+γ⋅Q(st+1,μ(st+1))
            na_from_tactor_a = self.target_actor.get_action(
                compressed_next_states)
            na_from_tactor = na_from_tactor_a.mean(dim=1).unsqueeze(-1)
            v_from_tcritic = self.target_critic.get_state_value(
                compressed_next_states, na_from_tactor)

            #calculate yt=r(st,at)+γ⋅Q(st+1,μ(st+1))
            target_v = compressed_rewards.unsqueeze(
                1) + self.GAMMA * v_from_tcritic
            actual_v = self.online_critic.get_state_value(
                compressed_states, compressed_actions)
            loss = nn.MSELoss()
            output = loss(actual_v, target_v)
            self.optim.zero_grad()
            output.backward(retain_graph=True)
            self.optim.step()

            self.online_critic.value_func.zero_grad()

            for s, a in zip(compressed_states.split(1),
                            compressed_actions.split(1)):
                online_v = self.online_critic.get_state_value(s, a)
                grad_wrt_a = torch.autograd.grad(online_v, (s, a))

                action = self.online_actor.get_action(s)
                action.mean().backward(retain_graph=True)

                for param in self.online_actor.policy.parameters():
                    param.data += self.ALPHA * (
                        param.grad * grad_wrt_a[1].item()) / (self.BATCH_SIZE)

                self.online_actor.policy.zero_grad()
                self.online_critic.value_func.zero_grad()

    #            #soft update

            for param_o, param_t in zip(self.online_actor.policy.parameters(),
                                        self.target_actor.policy.parameters()):
                param_t.data = param_o.data * self.TAU + param_t.data * (
                    1 - self.TAU)

            for param_o, param_t in zip(
                    self.online_critic.value_func.parameters(),
                    self.target_critic.value_func.parameters()):
                param_t.data = param_o.data * self.TAU + param_t.data * (
                    1 - self.TAU)

            self.online_actor.policy.zero_grad()
            self.target_actor.policy.zero_grad()
            self.online_critic.value_func.zero_grad()
            self.target_critic.value_func.zero_grad()

            torch.save(self.target_actor.policy.state_dict(),
                       self.agent_name + 'target_actor_state_1.pt')
            torch.save(self.target_critic.value_func.state_dict(),
                       self.agent_name + 'target_critic_state_1.pt')
Example #30
0
 def render_GET(self, request):
     request.setHeader(b"content-type", b"application/json")
     request.responseHeaders.addRawHeader(b"content-type",
                                          b"application/json")
     return Response.response(Response(request, data=extract_data(request)))
Example #31
0
 def get_face_data(self, target):
   df_face, _ = utils.load_data_from_csv(dtype="face")
   df_face, y = utils.extract_data(df_face, target, type="face")
   # df_face = preprocess(df_face, dtype="face")
   return df_face, y
df_liwc = pd.merge(df_liwc, df_output, left_on="userId", right_on="userid")
df_nrc = pd.merge(df_nrc, df_output, left_on="userId", right_on="userid")

# drop users with multiple faces, keeping only the first face
df_face.drop_duplicates(subset="userId", keep="first", inplace=True)
df_face = pd.merge(df_face,
                   df_output,
                   left_on="userId",
                   right_on="userid",
                   how="outer")
del df_face["userId"]
df_face.rename(columns={"userid": "userId"}, inplace=True)
# since there were missing faces, fill mean face in place of no-faces
df_face.fillna(df_face.mean(), inplace=True)

X_age_face_train, y_age_face_train = utils.extract_data(df_face, label="age")

# Min Max scale features
X_age_face_train = preprocessing.MinMaxScaleDataframe(X_age_face_train)
X_age_text_train, y_age_text_train = utils.extract_data(df_text, label="age")
X_age_text_train = preprocessing.MinMaxScaleDataframe(X_age_text_train)
"""Code"""
print("Pre-processing data...\n")
# remove pages with count less than threshold (Note: this removes few users as well)
threshold = 5

page_like_count = df_relation.groupby(['like_id']).size()
df_relation['likes_count'] = df_relation['like_id'].apply(
    lambda x: page_like_count.get(x))
df_relation_filtered = df_relation[df_relation['likes_count'] > threshold]
Example #33
0
 def get_text_data(self, target):
   df_text, _ = utils.load_data_from_csv(dtype="text")
   df_text, y = utils.extract_data(df_text, target, type="text")
   # df_text = preprocess(df_text, dtype="text")
   return df_text, y