Beispiel #1
0
Datei: dmn.py Projekt: 9rince/NLP
    def test_model(self):
        pred = self.build()
        cost = tf.nn.softmax_cross_entropy_with_logits(labels=self.snt_wt, logits=pred)
        saver = tf.train.Saver()
        my_data = prep_data(mode='testing')
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
            saver.restore(sess, "./params/model.ckpt")
            print('[+] restored parameters from disc.')
            self.test_loss = []
            for i in tqdm(range(100)):
                p2v,p_wt,q2v = my_data.get_vectors()
                self.test_loss.append(sess.run(cost,feed_dict={self.Passage: p2v ,
                                                 self.Question: q2v ,
                                                 self.snt_wt: p_wt}))
            plt.plot(self.test_loss,'r--')
            plt.show()






    # def train_model_eager():
    #     tf.enable_eager_execution()
    #     print('executing eagerly? {}'.format(tf.executing_eagerly()))


                    
                    
            
            
        
    
        """
Beispiel #2
0
def split_scale():
    train, test = train_test_split(prep_data(),
                                   train_size=.8,
                                   random_state=123)
    scaler = MinMaxScaler(copy=True, feature_range=(0, 1)).fit(train)

    train_scaled = pd.DataFrame(scaler.transform(train),
                                columns=train.columns.values).set_index(
                                    [train.index.values])

    test_scaled = pd.DataFrame(scaler.transform(test),
                               columns=test.columns.values).set_index(
                                   [test.index.values])

    return train_scaled, test_scaled, scaler
Beispiel #3
0
Datei: dmn.py Projekt: 9rince/NLP
 def train_model(self):
     print('[+] started training')
     pred = self.build_1()
     print(len(pred))
     print(self.snt_wt.shape)
     print('[+] model is built')
     cost = tf.nn.softmax_cross_entropy_with_logits(labels=self.snt_wt,logits=pred)#tf.nn.softmax(self.snt_wt) - pred
     print('[+] cost')
     init_op = tf.global_variables_initializer()
     print('[+] init_op')
     optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(cost)
     print('[+] optimizer')
     saver = tf.train.Saver()
     with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
         print('[+] session opened')
         # sess.run(init_op)
         saver.restore(sess, "./params/model.ckpt")
         print('[+] params loded from disc')
         my_data = prep_data(mode = 'training')
         self.MAX_P_WORDS = 50
         self.MAX_Q_WORDS = 30
         #q_list = list(self.df_qas['q_no'])
         self.train_loss = []
         for i in range(self.no_epoch):
             print('epoch {}'.format(i))
             avg_loss = 0.
             for i in tqdm(range(1000)):
                 p2v,p_wt,q2v = my_data.get_vectors()
                 _,loss_i = sess.run([optimizer,cost],feed_dict={self.Passage: p2v ,
                                              self.Question: q2v ,
                                              self.snt_wt: p_wt})
                 avg_loss += loss_i
             self.train_loss.append(avg_loss/1000.)
         save_path = saver.save(sess,"./params/model.ckpt")
         print('[+] saved parameters to disc.')
     
     plt.plot(self.train_loss,'r--')
     plt.show()
Beispiel #4
0
import prep as p
import score as s
import match as m
import analyze_evaluate as ae
track = "python"

print("before __name__ guard")
if __name__ == '__main__':
    print("importing data")
    data, email_ids, fdf, idx_dict = p.prep_data(track=track)
    print("scoring participants")
    df_matrix = s.scoring_alg(fdf, data, idx_dict, track=track)
    print("matching participants")
    df_matched = m.pair_participants(data, df_matrix, email_ids, idx_dict)
    print("analyze matches")
    ae.evaluate_matches(df_matched)
    print("saving matches to db or .csv")
    #m.save_matches(df_matched, track=track)
    print("create and save .csv for email sending")
    m.csv_announcement_email(df_matched)

print("after __name__ guard")

Beispiel #5
0
    try:
        name, epochs, batches = sys.argv[1:4]
    except ValueError:
        print('Usage: %s model_name epochs batch_size %s' % (script, xa))
        exit(1)
    try:
        plot = sys.argv[4]
    except IndexError:
        plot = False

    return name, int(epochs), int(batches), plot


if __name__ == '__main__':
    X, Y = get_data()
    train_x, train_y, test_x, test_y = prep_data(X, Y)
    # Getting our command line parameters
    name, epochs, batches, plot = get_params()
    # Do the training
    model, name, mp, history = train_model(name, train_x, train_y, epochs,
                                           batches, test_x, test_y)
    # Save models and the training history for later use
    mname = 'models/model-%s-%d-%d' % (name, epochs, batches)
    model.save(mname + '.h5')
    title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches)
    # Test our model on both data that has been seen
    # (training data set) and unseen (test data set)
    print('Scores for %s' % title)
    # Notice that we need to specify batch_size in evaluate when we're
    # using LSTM.
    train_score = model.evaluate(train_x,
Beispiel #6
0
from acquire import get_data
from prep import prep_data

# Get the raw data from .csv or MySQL query
raw = get_data()

# Remove nulls
df = prep_data(raw)

# Milestones before Friday:
# 2. Scale
# 3. Super basic Model

df.info()
df.describe()

# First pass for outlier detection:
# Do the value counts and distribution make sense?
# Is there anything way out of line here?
df.bedrooms.value_counts()  # encode as discrete
df.bathrooms.value_counts()  # encode as discrete
df.sqft.value_counts()  # can bin or scale
df.taxvalue.value_counts()  # scale this (also our target variable)
Beispiel #7
0
TEST_TAR_PATH = args['test_tar_path']
DATASET_PATH = args['dataset']
TEST_PATH = args['test']
INPUT_LENGTH = args['x_length']
OUTPUT_LENGTH = args['y_length']
HIDDEN_SIZE = args['hidden_size']
NUM_LAYERS = args['num_layers']
MODEL_WEIGHTS = args['model_weights']

if TAR_PATH is not None:
    untar(TAR_PATH)

if TEST_TAR_PATH is not None:
    untar(TEST_TAR_PATH)

X, Y, input_vocab, output_vocab, input_dict, output_dict = prep_data(
    DATASET_PATH, INPUT_LENGTH, OUTPUT_LENGTH)

X_test, Y_test, test_input_vocab, test_output_vocab, _, _ = prep_data(
    TEST_PATH, INPUT_LENGTH, OUTPUT_LENGTH)

auto_encoder = get_model(len(input_vocab), INPUT_LENGTH, len(output_vocab),
                         OUTPUT_LENGTH, HIDDEN_SIZE, NUM_LAYERS)
auto_encoder.load_weights(MODEL_WEIGHTS)

auto_encoder.summary()

expected_codes = vec_to_words(Y_test, test_output_vocab)
tasks = input_vec_to_words(X_test, test_input_vocab)
codes = predict_codes(auto_encoder, X_test, input_dict, INPUT_LENGTH,
                      output_vocab)
Beispiel #8
0
def split_data():
    train, test = train_test_split(prep_data(),
                                   train_size=.8,
                                   random_state=123)
    return train, test
Beispiel #9
0
from keras.utils import plot_model

from prep import prep_data

from model import get_model

ap = argparse.ArgumentParser()
ap.add_argument('--dataset', type=str, default='./data')
ap.add_argument('--x_length', type=int, default=15)
ap.add_argument('--y_length', type=int, default=20)
ap.add_argument('--hidden_size', type=int, default=128)
ap.add_argument('--num_layers', type=int, default=2)
ap.add_argument('--show_shapes', type=bool, default=False)

args = vars(ap.parse_args())

DATASET_PATH = args['dataset']
INPUT_LENGTH = args['x_length']
OUTPUT_LENGTH = args['y_length']
HIDDEN_SIZE = args['hidden_size']
NUM_LAYERS = args['num_layers']
SHOW_SHAPES = args['show_shapes']

_, _, input_vocab, output_vocab, _, _ = prep_data(DATASET_PATH, INPUT_LENGTH,
                                                  OUTPUT_LENGTH)

auto_encoder = get_model(len(input_vocab), INPUT_LENGTH, len(output_vocab),
                         OUTPUT_LENGTH, HIDDEN_SIZE, NUM_LAYERS)

plot_model(auto_encoder, to_file='model.png', show_shapes=SHOW_SHAPES)