예제 #1
0
def train_and_val(config, model, callbacks, mixture_num, sub_model_name):
    """Train and validate model."""
    print('training %s %s model' % (model_name, sub_model_name))

    train_size = int(
        (num_mon_sites * num_mon_inst_train + num_unmon_sites_train) * 0.95)
    train_steps = train_size // batch_size
    val_size = int(
        (num_mon_sites * num_mon_inst_train + num_unmon_sites_train) * 0.05)
    val_steps = val_size // batch_size

    train_time_start = time.time()
    model.fit_generator(data_generator.generate(config, 'training_data',
                                                mixture_num),
                        steps_per_epoch=train_steps if train_size %
                        batch_size == 0 else train_steps + 1,
                        epochs=epochs,
                        verbose=2,
                        callbacks=callbacks,
                        validation_data=data_generator.generate(
                            config, 'validation_data', mixture_num),
                        validation_steps=val_steps if val_size %
                        batch_size == 0 else val_steps + 1,
                        shuffle=False)
    train_time_end = time.time()

    print('Total training time: %f' % (train_time_end - train_time_start))
예제 #2
0
def load_data(y_name='Color'):
    """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
    train_data = data_generator.generate(NUM_TRAIN_DATA)
    print("TRAIN DATA:")
    print(train_data)
    train = pd.DataFrame(train_data, columns=COLUMN_NAMES)
    train_x, train_y = train, train.pop(y_name)

    test_data = data_generator.generate(NUM_TEST_DATA)
    print("TEST DATA:")
    print(test_data)
    test = pd.DataFrame(test_data, columns=COLUMN_NAMES)
    test_x, test_y = test, test.pop(y_name)

    return (train_x, train_y), (test_x, test_y)
예제 #3
0
def main():

    welcome()

    schema = open_data(data_location)

    if validate_data(schema):
        logging.info("[*] schema is valid")
    else:
        print("[!] exiting...")
        exit()

    # simulation state
    if simulation_table is not None:
        dynamo.create_simulation_record(dynamodb, simulation_table,
                                        simulation_id, simulation_duration,
                                        message_interval)

    # run simulation
    for i in range(simulation_duration):
        data = data_generator.generate(schema)
        logging.info(data)

        if not write_data(json.dumps(data)):
            logging.warning("[!] message failed to write to iot core endpoint")
            exit()

        time.sleep(message_interval)

    print("[*] simulation completed")

    # update state
    if simulation_table is not None:
        dynamo.delete_simulation_record(dynamodb, simulation_table,
                                        simulation_id)
예제 #4
0
def quick_train(train_stock):

	generate(train_stock)

	training_stock = 'training_data/' + train_stock + '.csv'
	strategy = 'double-dqn'
	window_size = 10
	batch_size = 32
	ep_count = 10
	model_name = 'model_double-dqn_GOOG_50'
	pretrained = False
	debug = False

	coloredlogs.install(level="DEBUG")
	switch_k_backend_device()
	print(training_stock)
	
	main(training_stock, window_size, batch_size,
			 ep_count, strategy=strategy, model_name=model_name, 
			 pretrained=pretrained, debug=debug)
예제 #5
0
def main():
    """
        Description: Main function
    """

    # Argument parsing
    args = parse_arguments()

    # Create the directory if it does not exist.
    try:
        os.makedirs(args.output_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Creating word list
    lang_dict = load_dict(args.language)

    # Create font (path) list
    fonts = load_fonts(args.language)

    # Creating synthetic sentences (or word)
    strings = []
    strings = create_strings_from_dict(args.length, args.random, args.count,
                                       lang_dict)
    string_count = len(strings)

    imgLists = load_img(args.background_dir)

    for i, img in enumerate(imgLists):
        try:
            generate(i, img, random.sample(strings, random.randint(1, 40)),
                     fonts[random.randrange(0, len(fonts))], args.output_dir,
                     args.extension, args.width, args.text_color,
                     args.orientation, args.space_width, args.font_size)
        except:
            continue
예제 #6
0
def predict(config, model, mixture_num, sub_model_name):
    """Compute and save final predictions on test set."""
    print('generating predictions for %s %s model' %
          (model_name, sub_model_name))

    if model_name == 'var-cnn':
        model.load_weights('model_weights.h5')

    test_size = num_mon_sites * num_mon_inst_test + num_unmon_sites_test
    test_steps = test_size // batch_size

    test_time_start = time.time()
    predictions = model.predict_generator(
        data_generator.generate(config, 'test_data', mixture_num),
        steps=test_steps if test_size % batch_size == 0 else test_steps + 1,
        verbose=0)
    test_time_end = time.time()

    if not os.path.exists(predictions_dir):
        os.makedirs(predictions_dir)
    np.save(file='%s%s_model' % (predictions_dir, sub_model_name),
            arr=predictions)

    print('Total test time: %f' % (test_time_end - test_time_start))
예제 #7
0
# default is 1.0. Accuracy becomes lower with larger sigma
sigma = 1.0

print('number of classes: ',num_class,' sigma for data scatter:',sigma)
if num_class == 4:
    n_train = 400
    n_test = 100
    feat_dim = 2
else:  # then 3
    n_train = 300
    n_test = 60
    feat_dim = 2

# generate train dataset
print('generating training data')
x_train, y_train = dg.generate(number=n_train, seed=None, plot=True, num_class=num_class, sigma=sigma)

# generate test dataset
print('generating test data')
x_test, y_test = dg.generate(number=n_test, seed=None, plot=False, num_class=num_class, sigma=sigma)

# set classifiers to 'svm' to test SVM classifier
# set classifiers to 'softmax' to test softmax classifier
# set classifiers to 'knn' to test kNN classifier
classifiers = 'svm'

if classifiers == 'svm':
    print('training SVM classifier...')
    w0 = np.random.normal(0, 1, (2 * num_class + num_class))
    result = minimize(svm_loss, w0, args=(x_train, y_train, num_class, n_train, feat_dim))
    print('testing SVM classifier...')
예제 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_candidate', type=int)
    parser.add_argument('--num_course', type=int)
    parser.add_argument('--if_figure', type=bool, default=False)
    parser.add_argument('--save_to', type=str, default='result/trail')
    parser.add_argument('--num_simulations', type=int, default=100)
    args = parser.parse_args()
    if not args.if_figure:
        data = data_generator.generate(args.num_candidate, args.num_course)
        output_file_name = args.save_to.split('.')[0]
        matching_sm = run_stable_marriage(data)
        score, course_satisfaction, candidate_satisfaction = evaluate_matching(
            data, matching_sm)
        write_to_file(data, matching_sm, output_file_name + '_sm.csv', score,
                      course_satisfaction, candidate_satisfaction)
        matching_hg = hungarian(data)
        score, course_satisfaction, candidate_satisfaction = evaluate_matching(
            data, matching_hg)
        write_to_file(data, matching_hg, output_file_name + '_hg.csv', score,
                      course_satisfaction, candidate_satisfaction)
        matching_mm = maximum_matching(data)
        score, course_satisfaction, candidate_satisfaction = evaluate_matching(
            data, matching_mm)
        write_to_file(data, matching_mm, output_file_name + '_mm.csv', score,
                      course_satisfaction, candidate_satisfaction)
    else:
        n = args.num_simulations
        dir = str(args.num_candidate) + 'candidates_' + str(
            args.num_course) + 'courses_' + str(n) + 'simulations'
        while n < 100:
            n = int(input('Try a number > 100: ') or '100')
        if not Path('./figures/' + dir).exists():
            Path('./figures/' + dir).mkdir(parents=True)
        score = np.zeros([n, 3])
        prof_rate = np.zeros([n, 3])
        can_rate = np.zeros([n, 3])
        for i in progressbar.progressbar(range(n)):
            data = data_generator.generate(args.num_candidate, args.num_course)
            sm = run_stable_marriage(data)
            hg = hungarian(data)
            mm = maximum_matching(data)
            score[i,
                  0], prof_rate[i,
                                0], can_rate[i,
                                             0] = evaluate_matching(data, sm)
            score[i,
                  1], prof_rate[i,
                                1], can_rate[i,
                                             1] = evaluate_matching(data, hg)
            score[i,
                  2], prof_rate[i,
                                2], can_rate[i,
                                             2] = evaluate_matching(data, mm)
        plt.figure(1)
        plt.hist(score[:, 0],
                 bins=10,
                 label='Stable Marriage',
                 alpha=0.6,
                 color='c')
        plt.axvline(sum(score[:,0])/n, linestyle='--', \
                    label='Mean of Stable Marraige={0:.{1}f}'.format(sum(score[:,0])/n,2), color='c')
        plt.hist(score[:, 1],
                 bins=10,
                 label='Hungarian',
                 alpha=0.6,
                 color='limegreen')
        plt.axvline(sum(score[:,1])/n, linestyle='--', \
                    label='Mean of Hungarian={0:.{1}f}'.format(sum(score[:,1])/n,2), color='limegreen')
        plt.hist(score[:, 2],
                 bins=10,
                 label='Maximum Matching',
                 alpha=0.6,
                 color='orange')
        plt.axvline(sum(score[:,2])/n, linestyle='--', \
                    label='Mean of Maximum Matching={0:.{1}f}'.format(sum(score[:,2])/n,2), color='orange')
        plt.title('Score, Monte Carlo n ={}'.format(n))
        plt.legend()
        plt.savefig(Path('./figures/' + dir + '/scores.png'))
        plt.figure(2)
        plt.hist(prof_rate[:, 0],
                 bins=10,
                 label='Stable Marriage',
                 alpha=0.6,
                 color='c')
        plt.axvline(sum(prof_rate[:,0])/n, linestyle='--', \
                    label='Mean of Stable Marraige={0:.{1}f}'.format(sum(prof_rate[:,0])/n,2), color='c')
        plt.hist(prof_rate[:, 1],
                 bins=10,
                 label='Hungarian',
                 alpha=0.6,
                 color='limegreen')
        plt.axvline(sum(prof_rate[:,1])/n, linestyle='--', \
                    label='Mean of Hungarian={0:.{1}f}'.format(sum(prof_rate[:,1])/n,2), color='limegreen')
        plt.hist(prof_rate[:, 2],
                 bins=10,
                 label='Maximum Matching',
                 alpha=0.6,
                 color='orange')
        plt.axvline(sum(prof_rate[:,2])/n, linestyle='--', \
                    label='Mean of Maximum Matching={0:.{1}f}'.format(sum(prof_rate[:,2])/n,2), color='orange')
        plt.title('Professors satisfaction rate, Monte Carlo n ={}'.format(n))
        plt.legend()
        plt.savefig(Path('./figures/' + dir + '/prof_rate.png'))
        plt.figure(3)
        plt.hist(can_rate[:, 0],
                 bins=10,
                 label='Stable Marriage',
                 alpha=0.6,
                 color='c')
        plt.axvline(sum(can_rate[:,0])/n, linestyle='--', \
                    label='Mean of Stable Marraige={0:.{1}f}'.format(sum(can_rate[:,0])/n,2), color='c')
        plt.hist(can_rate[:, 1],
                 bins=10,
                 label='Hungarian',
                 alpha=0.6,
                 color='limegreen')
        plt.axvline(sum(can_rate[:,1])/n, linestyle='--', \
                    label='Mean of Hungarian={0:.{1}f}'.format(sum(can_rate[:,1])/n,2), color='limegreen')
        plt.hist(can_rate[:, 2],
                 bins=10,
                 label='Maximum Matching',
                 alpha=0.6,
                 color='orange')
        plt.axvline(sum(can_rate[:,2])/n, linestyle='--',\
                    label='Mean of Maximum Matching={0:.{1}f}'.format(sum(can_rate[:,2])/n,2), color='orange')
        plt.title('Candidates satisfaction rate, Monte Carlo n ={}'.format(n))
        plt.legend()
        plt.savefig(Path('./figures/' + dir + '/can_rate.png'))
예제 #9
0
def execute_program():
    def status(word):
        word = str(word)
        if word.upper() != 'OK':
            beautiful_output.red_normal('--> Status: ' + '[' +
                                        "Error because " + str(word) + ']')

            input('Enter to quit...')
            os._exit(0)

        beautiful_output.green_normal('--> Status: ' + '[' + str(word) + ']')

    # Generate the code

    print('Begin to reconstruct...', end='      ')
    try:
        DB.reconstruct()

        # do not use del command become the CLI will be ugly
        os.system('rd /s/q train_data')
        os.system('md train_data')
        status('OK')

    except Exception as e:
        status(e)

    print('\nExecute generating progress...')
    try:
        num = int(input('The number of code you wanna generate: '))

        print('Generating...')
        generator.generate(num)
    except Exception as e:
        num = 0
        status(e)

    # print sample database

    print('\n')
    beautiful_output.underline('DATABASE CHECK:')
    DB.disp_DB()

    print('\nInitialize database...', end='      ')
    status('OK')

    # Clean the DB

    print('Remake the database, it may take a while...')

    try:
        data = data_generator.generate()
    except Exception as e:
        data = []
        status(e)

    # print out the clean data

    beautiful_output.underline('\nData check:')
    try:
        print(list(data.values())[0][0])
        print('\nData Check...', end='      ')

        status('OK')
    except Exception as e:
        status(e)

    # print log

    beautiful_output.underline('\nLog:')
    print('-------------------------------')
    print('train data number:  ' + str(num * 4))
    print('train data Pairs :  ' + str(len(list(data.keys()))))
    print('covered data rate:  ' + str(len(list(data.keys())) / 26 * 100)[:4] +
          '%')
    print('data shape       :  ' + str(list(data.values())[0][0].shape))
    print('-------------------------------')

    # data constructor for training

    def construct_data():

        label_total = list(data.keys())

        feature = []
        label = []

        for i in label_total:

            for ii in data[i]:
                # decrease the dimension
                ii = ii.reshape(1, ii.shape[1] * ii.shape[0])[0]
                feature.append(ii)
                label.append(i)

        return [feature, label]

    # train the data

    print('\nReconstruct the feature and label array...')
    try:

        # construct the knn model

        temp = construct_data()
        feature = temp[0]
        label = temp[1]

        beautiful_output.underline('\nCheck the feature:')
        print(feature[0][:10])
        print('\nReconstruct data...', end='      ')
        status('OK')
    except Exception as e:
        label = []
        feature = []
        status(e)

    print('\nTraining...', end='      ')
    try:

        # define cluster
        neighbor_num = len(np.unique(label))
        mode = kNN(n_neighbors=neighbor_num, algorithm='auto')
        mode.fit(feature, label)
        status('OK')
    except Exception as e:
        neighbor_num = 0
        mode = None
        status(e)

    # save model

    print('\nSave the model...', end='     ')
    try:
        joblib.dump(mode, './model.m')
        status('OK')
    except Exception as e:
        status(e)

    # validate accuracy

    print('\nValidate model accuracy')
    print('processing...')

    try:
        print('\nReconstruct...')
        DB.reconstruct()
        os.system('rd /s/q train_data')
        os.system('md train_data')

        print('\nGenerating test data...')
        generator.generate(int(num / 4))

        print('Clean the data')
        data = data_generator.generate()

        print('Reconstruct the data', end='      ')
        temp = construct_data()
        feature = temp[0]
        label = temp[1]
        predict_label = mode.predict(feature)

        compare = sum(list(map(lambda x, y: x == y, predict_label, label)))

        accuracy = str(compare / len(label) * 100)[:4]

        status('OK')

    except Exception as e:
        predict_label = []
        label = []
        accuracy = None
        status(e)

    beautiful_output.underline('\nModel accuracy: ')
    print('---------------------')
    print('Predict: ' + str(predict_label[:10]) + '...')
    print('Actual:  ' + str(label[:10]) + '...')
    print('---------------------')
    print(accuracy + '%')

    # print final summary
    beautiful_output.underline('\nSummary:')
    print('---------------------')
    print('Train data:     ' + str(len(predict_label)))
    print('Test data:      ' + str(int(len(predict_label) * 0.2)))
    print('Neighbor:       ' + str(neighbor_num) + '/26')
    print('Model Accuracy: ' + accuracy + '%')
    print('Model Address:  ' + './model.m')
    print('Train method:   ' + 'Knn')
    print('---------------------')
예제 #10
0
import torch
import pickle

# Import utility scripts
from MeLU import MeLU
from config import config
from train import training
from data_generator import generate
from evidence_candidate import selection

if __name__ == "__main__":
    master_path = "./ml"
    if not os.path.exists("{}/".format(master_path)):
        os.mkdir("{}/".format(master_path))
        # Preparing the dataset. It needs about 22GB of your hard disk space.
        generate(master_path)

    # Training the model
    melu = MeLU(config)
    model_filename = "{}/models.pkl".format(master_path)
    if not os.path.exists(model_filename):
        # Load the training dataset
        training_set_size = int(
            len(os.listdir("{}/warm_state".format(master_path))) / 4)
        # The support set is for local update
        supp_xs_s = []
        supp_ys_s = []
        # The query set is for global update
        query_xs_s = []
        query_ys_s = []
예제 #11
0
 def dataGen(self):
     '''
     Generates CSV data from StockTwits
     '''
     dataGenerator.generate(self.ticker)
예제 #12
0
import math
from PIL import Image, ImageDraw

import data_generator

points = data_generator.generate(250)

img = Image.new("RGB", (12 * 100, 12 * 100), "white")
draw = ImageDraw.Draw(img)
dotSize = 5

for point in points:
    x = math.trunc((point[0] + 6) * 100)
    y = math.trunc((point[1] + 6) * 100)
    draw.rectangle([x, y, x + dotSize - 1, y + dotSize - 1],
                   fill="orange" if point[2] == 0 else "blue")

img.show()  # View in default viewer
예제 #13
0
    if with_pyparams:
        parameters = ", ".join(["%s"] * len(columns))
    if with_numparams:
        parameters = ", ".join(":" + str(idx + 1)
                               for idx in range(len(columns)))
    sql = "INSERT INTO %s (%s) VALUES (%s)" \
          % (table, arguments, parameters)
    cursor.executemany(sql, data)


with app:

    connection = connect()
    cursor = connection.cursor()

    content = yaml.load(open(REGRESS_DATA))
    assert isinstance(content, list)

    for sql in prelude:
        cursor.execute(sql)

    for line in content:
        insert_table_data(line, cursor)

    generated_content = data_generator.generate(content)
    for line in generated_content:
        insert_table_data(line, cursor)

    connection.commit()
    connection.release()