Exemple #1
0
    def __init__(self):

        load_data.main()

        #connection from the data
        connection_string = "host= 'localhost' dbname='resort' user='******' password='******'"
        self.conn = psycopg2.connect(connection_string,cursor_factory=psycopg2.extras.DictCursor )


        #main function:

        self.loc_x = -1
        self.loc_y = -1
def load(database):
    ld = input("Do you want to load existing csv to %s database ?:" % database)
    load_csv = ld.upper()
    if load_csv == 'YES' or load_csv == 'Y':
        load_csv = load_data.main()
        print("CSV Loaded sucessfully")
        log.info("CSV Loaded sucessfully")
    else:
        print("Not loading csv to database")
        log.info("Not loading csv to database")
Exemple #3
0
def main():
    #parse arguments
    args = parse_args.parse_with_resolved_paths()
    args = dir_utils.resolve_run_directory(args)

    import pdb
    pdb.set_trace()
    #create/load data
    data = load_data.main(args)

    #train model/load model
    model = train.main(data, args)

    #evaluate model
    evaluate.main(data, model, args)
Exemple #4
0
def realnfake(image, image_size, epochs):
    dataloader = load_data.main((image_size))
    real = next(iter(dataloader))

    plt.figure(figsize=(64, 64))
    plt.subplot(1, 2, 1)
    plt.axis("off")
    plt.title("Real Images", fontsize=100)
    plt.imshow(np.transpose(vutils.make_grid(real[0].to(device)[
               :64], padding=5, normalize=True).cpu(), (1, 2, 0)))

    img = vutils.make_grid(image[0:64], padding=2, normalize=True)
    img = img.detach().numpy()
    plt.subplot(1, 2, 2)
    plt.title("Fake Images", fontsize=100)
    plt.axis("off")
    plt.imshow(np.transpose(img[:64], (1, 2, 0)))

    plt.savefig('RealAndFake' +
                str(image_size) + "_" + str(epochs) + '.png')
    plt.close()
import load_data
import generative_models as gm
import generative_alg as ga
import classify_models as cm
import classify_alg as ca
import adverse_alg as aa
import augment

import sys
import csv
import glob
import numpy as np
import os.path

if __name__ == "__main__":
    train, dev, test, wi, glove, prem_len, hypo_len = load_data.main()

    method = sys.argv[1]
    c_hidden_size = 150
    a_hidden_size = 150
    g_hidden_size = int(sys.argv[3])
    beam_size = 1
    version = int(sys.argv[2])
    batch_size = 64
    gen_epochs = 20
    latent_size = int(sys.argv[4])
    div_per_premise = 64
    div_samples = 32
    augment_file_size = 2**15
    aug_threshold = 0.9
    thresholds = [0.0, 0.3, 0.6, aug_threshold
Exemple #6
0
        self.epochs = epochs


def main(data, args):
    trainer = Trainer(args)  # gin configured

    #FIXME: combine into one line once stuff works
    mode_module = importlib.import_module(args.mode)
    model = mode_module.build_model(args)

    model.compile(optimizer=trainer.optimizer,
                  loss=trainer.loss,
                  metrics=trainer.metrics)

    tr_history = model.fit_generator(
        generator=data.generator('train'),
        verbose=2,  # one line per epoch
        batch_size=trainer.batch_size,
        epochs=trainer.epochs,  # = total data / batch_size
        validation_split=0.1,  # fraction of data used for val
        shuffle=True)

    return model


if __name__ == "__main__":
    args = parse_args.parse_with_resolved_paths()
    gin.parse_config_file(args.configpath)
    data = load_data.main(args)
    main(data, args)
Exemple #7
0
 def test_load_data_long(self):
     path = os.path.join(settings.MEDIA_ROOT, "tests", "data_long.csv")
     with open(path, 'r') as f:
         load_data.main(f)
Exemple #8
0

#SETUP
#------------------------------------------------

foldername = "lrd:"+str(lr_discriminator)+"lrg:" +str(lr_generator) + ",bsize:" + str(batch_size)
print(foldername)
if not os.path.exists(foldername):
    os.makedirs("lrd:"+str(lr_discriminator)+"lrg:" +str(lr_generator) + ",bsize:" + str(batch_size))

f = open(foldername+"/demofile3.txt", "w")
f.write(foldername)
f.close()

# Images
dataloader = load_data.main(image_size,batch_size)


# CUDA support -> if ngpu > 0 & Cuda is available
device = torch.device("cuda:0" if(
    torch.cuda.is_available() and ngpu > 0) else "cpu")
    
# Initialize the trainable params as stated in DCGAN paper (Radford, 2016)

def init_weights(model):
    classname = model.__class__.__name__
    # Convolutional layers
    if(classname.find('Conv') != -1):
        nn.init.normal_(model.weight.data, 0.0, 0.02)
    # Batchnorm layers
    elif classname.find('BatchNorm') != -1:
                    type=int,
                    default=5,
                    help="Checkpoint epochs")
parser.add_argument('--bsize', type=int, default=128, help="Batch_size")
args = parser.parse_args()

ngpu = args.ngpu
img_size = args.i
ndf = img_size
ngf = img_size
current = args.curr
k = args.k

device = torch.device("cuda:0" if (
    torch.cuda.is_available() and ngpu > 0) else "cpu")
dataloader = load_data.main(128)

filepath = 'Training/model'
gen = models.Generator(ngpu, ngf)
dis = models.Discriminator(ngpu, ndf)
try:
    gen.load_state_dict(
        torch.load(filepath + "G_" + str(img_size) + "_" + str(current),
                   map_location=device))
    dis.load_state_dict(
        torch.load(filepath + "D_" + str(img_size) + "_" + str(current),
                   map_location=device))
except:
    print(current)
    print(img_size)
    print("Models does not exist")
def main(_):
    tf.reset_default_graph()
    # Import data
    train_data_sounds,test_data_sounds,train_data_labels,test_data_labels_og,test_data_id = load_data.main()
    print('Data Loaded')
    test_data_tag = np.arange(3750)
    img_number = len(test_data_labels_og)
    log_per_epoch = int(1000/FLAGS.max_epochs)
    log_frequency = len(train_data_labels)/FLAGS.batch_size/log_per_epoch
    if log_frequency == 0 :
        log_frequency+=1
    #Data placeholders
    train_data_placeholder = tf.placeholder(tf.float32, [None, 80, 80])
    train_labels_placeholder = tf.placeholder(tf.int32, [None])
    test_data_placeholder = tf.placeholder(tf.float32, [None, 80, 80])
    test_labels_placeholder = tf.placeholder(tf.int32, [None])
    test_id_placeholder = tf.placeholder(tf.int32, [None])
    test_tag_placeholder = tf.placeholder(tf.int32, [None])
    #Train split
    n_sounds = len(train_data_labels)
    train_iterator = batch_this(train_data_placeholder,train_labels_placeholder,FLAGS.batch_size,n_sounds)
    train_batch = train_iterator.get_next()
    #Test split
    test_data_labels = test_data_labels_og
    test_iterator = batch_this(test_data_placeholder,test_labels_placeholder,FLAGS.batch_size,len(test_data_labels),1)
    test_batch = test_iterator.get_next()
    print('Data preprocessing done')
    
    with tf.variable_scope('inputs'):
        x = tf.placeholder(tf.float32, [None,80,80])
        y_ = tf.placeholder(tf.int32, [None])

    #Deep or shallow model
    if FLAGS.model_type == 'shallow':
        y_conv = shallow(x,FLAGS.initialiser)
    elif FLAGS.model_type == 'deep':
        y_conv = deep(x,FLAGS.initialiser)
    #Loss
    with tf.variable_scope('x_entropy'):
        cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    #L1 regularization
    l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.0001)
    all_weights = tf.trainable_variables()
    regularization_factor = tf.contrib.layers.apply_regularization(l1_regularizer, weights_list= all_weights)
    cross_entropy += regularization_factor 
    #Learning rate + Adam optimizer
    if FLAGS.learning_type == 'decay':
        global_step = tf.Variable(0, trainable=False)
        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,global_step ,1000,0.8)
        optimiser = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy,global_step=global_step)
    elif FLAGS.learning_type == 'normal':
        optimiser = tf.train.AdamOptimizer(FLAGS.learning_rate,beta1=0.9,beta2=0.999,epsilon=1e-08).minimize(cross_entropy)
    #Accuracy
    accuracy = tf.equal(tf.argmax(y_conv,1),tf.cast(y_, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(accuracy, tf.float32))
    #Summaries
    loss_summary = tf.summary.scalar('Loss', cross_entropy)
    acc_summary = tf.summary.scalar('Accuracy', accuracy)
    merged = tf.summary.merge([loss_summary,acc_summary])
    
    with tf.Session() as sess:
        #Summary writers
        train_writer = tf.summary.FileWriter(run_log_dir + '_train', sess.graph,flush_secs=5)
        test_writer = tf.summary.FileWriter(run_log_dir + '_test', sess.graph,flush_secs=5)
        sess.run(tf.global_variables_initializer())
        step = 0
        #TRAINING LOOP
        for epoch in range(FLAGS.max_epochs):
            #Run initializers
            sess.run(train_iterator.initializer,feed_dict={train_data_placeholder:train_data_sounds, train_labels_placeholder:train_data_labels})
            sess.run(test_iterator.initializer,feed_dict={test_data_placeholder:test_data_sounds, test_labels_placeholder:test_data_labels})
            print('Epoch:',epoch)
            while True:
                #Train optimizer for all batches, stop when out of range
                try:
                    [train_sounds,train_labels] = sess.run(train_batch)
                    _,train_summary = sess.run([optimiser,merged], feed_dict={x:train_sounds, y_:train_labels}) 
                except tf.errors.OutOfRangeError:
                    break
                #Print Accuracy on test set
                [test_sounds,test_labels] = sess.run(test_batch)
                validation_accuracy,test_summary = sess.run([accuracy,merged], feed_dict={x:test_sounds, y_:test_labels})
                if step % 170 == 0:
                    print(' step: %g,accuracy: %g' % (step,validation_accuracy))
                
                #Add summaries
                if step % log_frequency == 0:
                    train_writer.add_summary(train_summary,step)
                    test_writer.add_summary(test_summary,step)
                step+=1
        print('Training done')
        
        #EVALUATION on test set
        evaluated_images = 0
        test_accuracy = 0
        batch_count = 0
        sess.run(test_iterator.initializer,feed_dict={test_data_placeholder:test_data_sounds, test_labels_placeholder:test_data_labels})
        while evaluated_images != img_number:
            [test_sounds,test_labels] = sess.run(test_batch)
            evaluated_images += len(test_labels)
            test_accuracy_temp = sess.run(accuracy, feed_dict={x: test_sounds, y_: test_labels})
            test_accuracy += test_accuracy_temp
            batch_count += 1
        test_accuracy = test_accuracy / batch_count
        print(test_accuracy)
Exemple #11
0
def weekly_update(db_choice, drop_tables_first=False):
    #TODO should update the secrets.json keys to make them simpler so that this mapping is irrelevant

    send_log = True
    debug = True

    #Run the jobs
    try:
        if drop_tables_first:
            remove_table_flag = '--remove-tables'
            tables_to_remove = 'all'
        else:
            remove_table_flag = ''
            tables_to_remove = ''

        #Get and load data in order so that we appropriately deal with duplicate records

        #Start with MAR so that we can geocode things
        arguments = get_api_data.parser.parse_args(
            [db_choice, '--modules', 'opendata', '--ids', 'mar'])
        get_api_data.get_multiple_api_sources(arguments)
        arguments = load_data.parser.parse_args([
            db_choice, '--update-only', 'mar', '--skip-calculations',
            remove_table_flag, tables_to_remove
        ])
        load_data.main(arguments)

        #prescat
        arguments = get_api_data.parser.parse_args(
            [db_choice, '--modules', 'prescat'])
        get_api_data.get_multiple_api_sources(arguments)
        arguments = load_data.parser.parse_args([
            db_choice, '--update-only', 'prescat_project', 'prescat_subsidy',
            'prescat_addre', 'prescat_reac', 'prescat_real_property',
            'prescat_parcel', '--skip-calculations'
        ])
        load_data.main(arguments)

        #then DHCD since it has better data when duplicate entries appear in DCHousing
        arguments = get_api_data.parser.parse_args(
            [db_choice, '--modules', 'dhcd'])
        get_api_data.get_multiple_api_sources(arguments)
        arguments = load_data.parser.parse_args([
            db_choice, '--update-only', 'dhcd_dfd_properties_project',
            'dhcd_dfd_properties_subsidy', 'dhcd_dfd_properties_addre',
            '--skip-calculations'
        ])
        load_data.main(arguments)

        #Then DCHousing
        arguments = get_api_data.parser.parse_args(
            [db_choice, '--modules', 'DCHousing'])
        get_api_data.get_multiple_api_sources(arguments)
        arguments = load_data.parser.parse_args([
            db_choice, '--update-only', 'dchousing_project',
            'dchousing_subsidy', 'dchousing_addre', '--skip-calculations'
        ])
        load_data.main(arguments)

        #Then everything else
        #TODO it's a little bit clunky to do it this way but to do "everything else" we'd need to modify load_data to accept a negative list
        arguments = get_api_data.parser.parse_args([
            db_choice,
            '--modules',
            'opendata',
            #TODO temporarily skipped because it's slow: 'wmata_distcalc',
            'census'
        ])

        get_api_data.get_multiple_api_sources(arguments)
        arguments = load_data.parser.parse_args([
            db_choice,
            '--update-only',
            'tract2010_ward2012',
            'tract2010_cluster2000',
            'tax',
            #'hmda_all_dc',
            'topa_rcasd_2017',
            'topa_rcasd_2016',
            'topa_rcasd_2015',
            'building_permits_2016',
            'building_permits_2017',
            'crime_2016',
            'crime_2017',
            'acs5_2015',
            'wmata_stops',
            'wmata_dist'
        ])
        load_data.main(arguments)

    except Exception as e:
        logger.error("Weekly update failed with error: %s", e)
        if debug:
            raise e

    finally:
        if send_log:
            send_log_file_to_admin(debug=debug)
Exemple #12
0
def main():
    print('## Running the %s' % load_data.__name__)
    load_data.main()
    print('## Running the %s' % train_models.__name__)
    train_models.main()
Exemple #13
0
#     def __init__(self):
#
#     def eval():
#
#     def _single_move():
#
#
# # TODO


def main(args, data, model):
    agent = random.choice(list(data.test_data))
    steps = sum([len(game[0]) for game in data.test_data[agent]])

    print('\n# Evaluate on test data')
    results = model.evaluate_generator(generator=data.naive_generator(
        32, 'test'),
                                       steps=steps)
    print('test loss, test acc:', results)

    # read in test data
    # forward pass with model
    # display metrics, save results


if __name__ == "__main__":
    args = parse_args.parse()
    data = load_data.main()
    model = train.main(data, args)
    main(args, data, model)
Exemple #14
0
 def test_load_data_long(self):
     path = os.path.join(settings.MEDIA_ROOT, "tests", "data_long.csv")
     with open(path, 'r') as f:
         load_data.main(f)
import load_data
import generative_models as gm
import generative_alg as ga
import classify_models as cm
import classify_alg as ca
import adverse_alg as aa
import augment

import sys
import csv
import glob
import numpy as np
import os.path

if __name__ == "__main__":
    train, dev, test, wi, glove, prem_len, hypo_len = load_data.main()
    
    method = sys.argv[1]
    version = int(sys.argv[2])
    g_hidden_size = int(sys.argv[3])
    latent_size = int(sys.argv[4])

    c_hidden_size = 150
    a_hidden_size = 150
    beam_size = 1
    batch_size = 64
    gen_epochs = 20
    div_per_premise = 64
    div_samples = 32
    augment_file_size = 2 ** 15
    aug_threshold = 0.9
 def test_main(self):
     self.assertTrue(load_data.main())
Exemple #17
0
import numpy as np
import pandas as pd
import multiprocessing as mp
import matplotlib.pyplot as plt

from machine_learning_hep.utilities import create_folder_struc, seldf_singlevar, openfile
from multiprocessing import Pool, cpu_count
from parallel_calc import split_df, parallelize_df, num_cores, num_part

from load_data import main
import time
debug = False
real_data = True
plots = False
dfreco = main(debug, real_data, plots)

#parallelized functions over the dataframe

print("parallelizing will be done with", num_cores, "cores")


def filter_phi(df):
    delta_phi_all = []
    grouped = df.groupby(["run_number", "ev_id"], sort=False)
    df["is_d"] = 0
    for name, group in grouped:
        pt_max = group["pt_cand"].idxmax()
        phi_max = df.loc[pt_max, "phi_cand"]
        df.loc[pt_max, "is_d"] = 1
        delta_phi = np.abs(phi_max - group["phi_cand"])
        delta_phi_all.extend(delta_phi)
Exemple #18
0
def main():
    querydir = "1k"
    timerfile = open('currents/' + querydir + '/time', 'w')
    inittime = time.time()
    load_data.main(timerfile, querydir)
    timerfile.write("Total loading time " + str(time.time() - inittime) + '\n\n')
    std_out = sys.stdout

    initqfulltime = time.time()
    sys.stdout = open('currents/' + querydir + '/q1out', 'w')
    q1 = getqueryargsmaybeminus(querydir + "/queries1.txt")
    for arg in q1:
        initindiv = time.time()
        query1_load_retrieve.main(arg[0], arg[1], arg[2], querydir)
        timerfile.write('Query 1 ' + arg[0] + ' ' + arg[1] + ' ' + arg[2] + ' time ' + str(time.time() - initindiv) \
                        + '\n')
    sys.stdout.close()
    timerfile.write('Total q1 time ' + str(time.time() - initqfulltime) + '\n\n')
    sys.stdout = std_out
    print "Test 1 done"

    initqfulltime = time.time()
    sys.stdout = open('currents/' + querydir + '/q2out', 'w')
    q2 = getqueryargsnominus(querydir + "/queries2.txt")
    for arg in q2:
        initindiv = time.time()
        absolute_day = datetime.date(int(arg[1]), int(arg[2]), int(arg[3])).toordinal()
        query2.main(absolute_day, arg[0])
        timerfile.write('Query 2 ' + arg[0] + ' ' + str(absolute_day) + ' time ' + str(time.time() - initindiv) \
                        + '\n')
    sys.stdout.close()
    timerfile.write('Total q2 time ' + str(time.time() - initqfulltime) + '\n\n')
    sys.stdout = std_out
    print "Test 2 done"

    initqfulltime = time.time()
    sys.stdout = open('currents/' + querydir + '/q3out', 'w')
    q3 = getqueryargsmaybeminus(querydir + "/queries3.txt")
    for arg in q3:
        initindiv = time.time()
        query3_more_opt.main(arg[0], arg[1], arg[2])
        timerfile.write('Query 3 ' + arg[0] + ' ' + arg[1] + ' ' + arg[2] + ' time ' + str(time.time() - initindiv)
                        + '\n')
    sys.stdout.close()
    timerfile.write('Total q3 time ' + str(time.time() - initqfulltime) + '\n\n')
    sys.stdout = std_out
    print "Test 3 done"

    initqfulltime = time.time()
    sys.stdout = open('currents/' + querydir + '/q4out', 'w')
    q4 = getqueryargsmaybeminus(querydir + "/queries4.txt")
    for arg in q4:
        initindiv = time.time()
        query4_more_opt.main(arg[0], arg[1], querydir)
        timerfile.write('Query 4 ' + arg[0] + ' ' + arg[1] + ' time ' + str(time.time() - initindiv) \
                        + '\n')
    sys.stdout.close()
    timerfile.write('Total q4 time ' + str(time.time() - initqfulltime) + '\n\n')
    sys.stdout = std_out
    print "Test 4 done"

    sys.stdout = open('currents/' + querydir + '/report', 'w')
    filecmp.dircmp('actuals/' + querydir, 'currents/' + querydir).report()
    sys.stdout.close()

    timerfile.close()