def run(self, source=None): """ starts the program's main run-loop """ self.data = DataHandler(delegate=self) if not source: while 1: try: if not self.data: self.data = DataHandler() if not self.stats: self.stats = AnagramStats() if not self.stream_handler: self.stream_handler = StreamHandler() logging.info('entering run loop') self.start_stream() except KeyboardInterrupt: break except NeedsSave: print('\nclosing stream for scheduled maintenance') # todo: this is where we'd handle pruning etc finally: self.stream_handler.close() self.stream_handler = None self.data.finish() self.data = None self.stats.close() self.stats = None else: # means we're running from local data self.run_with_data(source)
def train(self): datah = DataHandler() train_data = datah.getTrainSplit() print(type(train_data)) print(np.shape(train_data[0])) print(np.shape(train_data[1])) print(train_data[1]) # test_data=datah.getTestSplit() # validation_data=datah.getValidationSplit() saved = ModelCheckpoint( "Weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5", monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) self.model.fit(np.array(train_data[0]), train_data[1], initial_epoch=self.start_epoch, validation_split=0.8, epochs=10000, batch_size=500, verbose=1, callbacks=[saved])
def __init__(self, strategy, portfolio, analyser, **kwargs): self.strategy = strategy self.portfolio = portfolio self.analyser = [analyser] if type(analyser) != list else analyser self.backtest_modules = [self, self.strategy, self.portfolio] self.backtest_modules.extend(self.analyser) self.symbols = None self.qcodes = None self.date_start = None self.date_end = None self.frequency = None self.datas = None self.trade_time = None self.benchmark = None self.benchmark_qcode = None for module in self.backtest_modules: module.__dict__.update(kwargs) #self.__dict__.update(kwargs) self.validate_input() self.data_handler = DataHandler(self.symbols, self.qcodes, self.date_start, self.date_end, self.frequency, self.datas) self.benchmark_handler = DataHandler([self.benchmark], [self.benchmark_qcode], self.date_start, self.date_end, self.frequency, self.datas)
def __init__(self, indices, data_handler=None, data=None): self.indices = indices self.labels = None if data_handler: self.data_handler = data_handler else: self.data_handler = DataHandler(data)
def __init__(self, strategy, portfolio, analyser, **kwargs): self.strategy = strategy self.portfolio = portfolio self.analyser = [analyser] if type(analyser) != list else analyser self.backtest_modules = [self, self.strategy, self.portfolio] self.backtest_modules.extend(self.analyser) self.symbols = None self.qcodes = None self.date_start = None self.date_end = None self.frequency = None self.datas = None self.trade_time = None self.benchmark = None self.benchmark_qcode = None for module in self.backtest_modules: module.__dict__.update(kwargs) #self.__dict__.update(kwargs) self.validate_input() self.create_outdir() self.data_handler = DataHandler(self.symbols, self.qcodes, self.date_start, self.date_end, self.frequency, self.datas) self.benchmark_handler = DataHandler([self.benchmark], [self.benchmark_qcode], self.date_start, self.date_end, self.frequency, self.datas)
def word_parser(final_callback=None): global model global datahandler model = Model() datahandler = DataHandler(noActualLoad=True) for i, word in enumerate(datahandler.getClasses()): current_word_prob[word] = 0 capture_audio(callback_word, final_callback)
def __init__(self, debug_mode=False, timeout=10.0): self.data_handler = DataHandler() self.connector = SocketHandler(timeout) self.debug = debug_mode self.bot = Bot() self.DEFAULT_TICKS = 2*(1000//50) self.ticks = self.DEFAULT_TICKS
def load_data(fname): data_handler = DataHandler(fname) try: data = data_handler.load_data() return data except (TypeError, IOError) as detail: print "Error: ", detail sys.exit(1)
def __init__(self, addr, port): self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.addr = addr self.port = port self.bind = False self.connection_list = [] self.threadList = [] self.dataList = DataHandler()
def main(): data_handler = DataHandler(symbols, qcodes, date_start, date_end, frequency, datas) datas_symbols = data_handler.generate_data() #prices = datas_symbols['Close'].iloc[:,0] prices = datas_symbols[trade_time] plot_time_series(prices) plot_scatter(prices) plot_residuals(prices) print cadf(prices)
def setUp(self) -> None: # Start with a clean slate self.total_deaths_df = DataHandler( ).get_total_deaths_per_country_and_day(self.csv_df) with self.db.create_connection() as con: cursor = con.cursor() cursor.execute('DROP TABLE IF EXISTS ' + self.total_deaths_table + ';') cursor.execute('DROP TABLE IF EXISTS ' + self.death_change_python_table + ';') con.commit()
def main(exp, tag, seed): if exp == 'mnist': opts = configs.config_mnist elif exp == 'fashion': opts = configs.config_fashion elif exp == 'svhn': opts = configs.config_SVHN elif exp == 'cifar10': opts = configs.config_cifar10 else: assert False, 'Unknown experiment configuration' opts['imbalance'] = FLAGS.imbalance opts['work_dir'] = data_dir opts['aug_rate'] = FLAGS.aug_rate if opts['verbose']: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') utils.create_dir(opts['work_dir']) utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints')) # Dumping all the configs to the text file with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text: text.write('Parameters:\n') for key in opts: text.write('%s : %s\n' % (key, opts[key])) # Loading the dataset data = DataHandler(opts, seed) assert data.num_points >= opts['batch_size'], 'Training set too small' model = CaLeG(opts, tag) model.train(data) del model
def main(): parser = argparse.ArgumentParser(description="Face detection demo") parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase output verbosity (2 levels)") # set logging level set_logging_from_args(sys.argv, parser) args = parser.parse_args() # start servers StaticServer(settings.WEBSERVER_PORT).start() CommandSocketServer(settings.SOCKET_PORT).start() # instantiate data handler from db DataHandler() # instantiate face detection helper f = FaceDetection() try: while (True): sleep(settings.TIME_BETWEEN_SHOTS) f.detect_faces() except KeyboardInterrupt: os._exit(0) except: traceback.print_exc() # close all threads os._exit(1)
def main(): if FLAGS.exp == 'celebA': opts = configs.config_celebA elif FLAGS.exp == 'celebA_small': opts = configs.config_celebA_small elif FLAGS.exp == 'mnist': opts = configs.config_mnist elif FLAGS.exp == 'mnist_small': opts = configs.config_mnist_small elif FLAGS.exp == 'dsprites': opts = configs.config_dsprites elif FLAGS.exp == 'grassli': opts = configs.config_grassli elif FLAGS.exp == 'grassli_small': opts = configs.config_grassli_small elif FLAGS.exp == 'dir64': opts = configs.config_dir64 else: assert False, 'Unknown experiment configuration' if FLAGS.zdim is not None: opts['zdim'] = FLAGS.zdim if FLAGS.lr is not None: opts['lr'] = FLAGS.lr if FLAGS.z_test is not None: opts['z_test'] = FLAGS.z_test if FLAGS.lambda_schedule is not None: opts['lambda_schedule'] = FLAGS.lambda_schedule if FLAGS.work_dir is not None: opts['work_dir'] = FLAGS.work_dir if FLAGS.wae_lambda is not None: opts['lambda'] = FLAGS.wae_lambda if FLAGS.enc_noise is not None: opts['e_noise'] = FLAGS.enc_noise if FLAGS.epoch_num is not None: opts['epoch_num'] = FLAGS.epoch_num if opts['verbose']: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') utils.create_dir(opts['work_dir']) utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints')) # Dumping all the configs to the text file with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text: text.write('Parameters:\n') for key in opts: text.write('%s : %s\n' % (key, opts[key])) # Loading the dataset data = DataHandler(opts) assert data.num_points >= opts['batch_size'], 'Training set too small' # Training WAE wae = WAE(opts) wae.train(data)
def createimgs(opts): net = fideval.restore_net(opts) n = 50 k = 10 m = 5 NUM_POINTS = 10000 BATCH_SIZE = 100 data = DataHandler(opts) images = data.data[:n] enc_pics = net.sess.run( net.encoded, feed_dict={ #net.sample_noise: np.random.normal(size=(5, opts['zdim'])), net.sample_points: images, net.is_training: False }) #pos = net.sample_pz(n) #pos = pos[:,:2] #pca = PCA(n_components=2) #pos = pca.fit_transform(pos) pos = enc_pics tsne = TSNE(n_components=2) pos = tsne.fit_transform(pos) print(pos) zs = net.sample_pz(2 * k) zs = np.reshape(zs, (k, 2, -1)) lows = zs[:, 0, :] highs = zs[:, 1, :] grid = interpolate(lows, highs) for img_index in range(NUM_POINTS // BATCH_SIZE): gen_pics = net.sess.run( net.decoded, feed_dict={ #net.sample_noise: np.random.normal(size=(5, opts['zdim'])), net.sample_noise: grid, net.is_training: False }) plotImages(gen_pics, m, k, 'gridpics') scatterpics(images, pos)
def main(): # Select dataset to use if FLAGS.dataset == 'dsprites': opts = configs.config_dsprites elif FLAGS.dataset == 'noisydsprites': opts = configs.config_noisydsprites elif FLAGS.dataset == 'screamdsprites': opts = configs.config_screamdsprites elif FLAGS.dataset == 'smallNORB': opts = configs.config_smallNORB elif FLAGS.dataset == '3dshapes': opts = configs.config_3dshapes elif FLAGS.dataset == '3Dchairs': opts = configs.config_3Dchairs elif FLAGS.dataset == 'celebA': opts = configs.config_celebA elif FLAGS.dataset == 'mnist': opts = configs.config_mnist else: assert False, 'Unknown dataset' # Set method param opts['data_dir'] = FLAGS.data_dir opts['fid'] = True opts['network'] = net_configs[FLAGS.net_archi] # Model set up opts['model'] = FLAGS.model if FLAGS.dataset == 'celebA': opts['zdim'] = 32 elif FLAGS.dataset == '3Dchairs': opts['zdim'] = 16 else: opts['zdim'] = 10 # Create directories opts['out_dir'] = FLAGS.out_dir out_subdir = os.path.join(opts['out_dir'], opts['model']) opts['exp_dir'] = os.path.join(out_subdir, FLAGS.res_dir) if not tf.io.gfile.isdir(opts['exp_dir']): raise Exception("Experiment doesn't exist!") #Reset tf graph tf.reset_default_graph() # Loading the dataset data = DataHandler(opts) assert data.train_size >= opts['batch_size'], 'Training set too small' # init method run = Run(opts, data) # get fid run.fid_score(opts['exp_dir'], FLAGS.weights_file, FLAGS.compute_stats, FLAGS.fid_inputs)
def test_no_db_update_on_subsequent_daily_changes_calculations(self): # Calculate daily change with current data and insert them to deaths_change_python table self.db.create_deaths_change_python_table() daily_change = DataHandler().get_daily_change_of_deaths( self.total_deaths_df) changed_rows = self.db.insert_to_deaths_change_python_table( daily_change) new_daily_change = DataHandler().get_daily_change_of_deaths( self.total_deaths_df) self.assertTrue(daily_change.equals(new_daily_change)) new_changed_rows = self.db.insert_to_deaths_change_python_table( new_daily_change) self.assertIs( new_changed_rows, 0, "There should not be any changes to deaths_change_python table in the second run" )
def main(): n_topics = 40 offset = 0 top_n = 20 #words per topic prefix = "t40_12gram_" path = "files/wordclouds/" ngram_range = (1, 2) dh = DataHandler(use_cache=True, ngram_range=ngram_range) tfidf, tfidf_vocab = dh.get_tfidf() tf, tf_vocab = dh.get_tf() # for n_topics in range(20, 27, 1): # for offset in range (n_topics-2,n_topics+3,1): ch = ClusterHandler( n_topics=n_topics, top_n=top_n, # words per topic soft_offset=offset, prefix=prefix, path=path, ) #ch.calc_svd(matrix=tfidf, vocab=tfidf_vocab) cluster_assignments, topics = ch.calc_nmf(matrix=tfidf, vocab=tfidf_vocab, providers=dh.get_providers(), hardclustering=False) out.storeClustersToDB(cluster_assignments=cluster_assignments, topics=topics, source_uris=dh.get_uris(), soft_clustering=True)
def _insert_deaths_data(self, new_data_df, table_name): """ Inserts deaths data to COVID19 deaths data table, denoted by table_name. If the new data contain retrospectively modified rows, such rows are updated in the table :param new_data_df: Data frame with new data :param table_name: Name of the table to insert data :return: The number of updated rows """ changed_rows = -1 row_count = self.execute_query('SELECT COUNT(*) FROM ' + table_name + ';')[0][0] if row_count == 0: ''' The table is empty, insert all the data in the data frame ''' with self.create_connection() as con: new_data_df.to_sql(con=con, name=table_name, if_exists='append', index=False) changed_rows = len(new_data_df) else: ''' If the table is not empty append only the new data rows because re-writing all the data is too expensive ''' # Read the current data from table as a data frame with self.create_connection() as con: curr_data = pd.read_sql_query('SELECT * FROM ' + table_name + ';', con=con) dh = DataHandler() # Filter changed or newly added country and date combinations modified_rows = dh.get_changed_rows(new_data_df, curr_data) # Update the database changed_rows = self.upsert_to_table(new_data_df.iloc[modified_rows, ], table_name) return changed_rows
def main(): config = Config() parser = argparse.ArgumentParser() parser.add_argument('-td', '--test-dataset', help='Walk through dataset \ and test while preprocessing', action='store_true') parser.add_argument('-e', '--execute', help='Execute', action='store_true') parser.add_argument('-t', '--train', help='Train Model', action='store_true') parser.add_argument('-wp', '--word-parser', help='Listen to microphone parse the word', action='store_true') parser.add_argument('-p', '--predict', help='Predict Audiofile', nargs='+') args = parser.parse_args() if args.test_dataset: datahandler = DataHandler() print("Test Passed") return if args.execute: from event_handler import EventHandler eh = EventHandler() word_parser(eh) if args.train: model = Model() model.train() if args.predict: model = Model() datahandler = DataHandler(noActualLoad=True) result_prob = model.predict(args.predict, datahandler.getClasses()) for fname, rp in zip(args.predict, result_prob): print("%s\t%s\twith Probabity %f" % (fname, rp[0], rp[1])) if args.word_parser: word_parser()
def main(): data_handler = DataHandler(symbols, qcodes, date_start, date_end, frequency, datas) datas_symbols = data_handler.generate_data() if len(symbols) == 1: prices = datas_symbols[trade_time].iloc[:, 0] else: prices = datas_symbols[trade_time] #plot_time_series(prices) #___ Single asset ___# #print adf(prices) #print hurst(prices, True) #___ Multiple assets ___# #plot_scatter(prices) plot_residuals(prices) #print cadf(prices) print halflife(residuals(prices))
def main(): data_handler = DataHandler(symbols, qcodes, date_start, date_end, frequency, datas) datas_symbols = data_handler.generate_data() if len(symbols) == 1: prices = datas_symbols[trade_time].iloc[:,0] else: prices = datas_symbols[trade_time] #plot_time_series(prices) #___ Single asset ___# #print adf(prices) #print hurst(prices, True) #___ Multiple assets ___# #plot_scatter(prices) plot_residuals(prices) #print cadf(prices) print halflife(residuals(prices))
def __init__(self, parent = None, *args, **kwargs): # Initialize TkInter frame and define parent tk.Frame.__init__(self, parent ) self.parent = parent #initialize a starting deque size and poll rate self.pollRate = POLL_RATE_ms self.dequeSize = 100 # Create arduino device and datadeque as object parameters self.device = arduino() self.dataHandler = DataHandler(dequeLength=DATA_POINTS_PER_PLOT) # Create serial port frame, data handling frame, and plot frame self.sH = sH.SerialHandlerUI( parent = parent, device = self.device ) self.dH = dH.DataHandlerUI( parent = parent, dataHandler = self.dataHandler ) self.pH = pH.PlotHandlerUI( parent = parent, dataHandler = self.dataHandler ) # place into UI self.sH.grid(row=0, column=0, columnspan=6) self.dH.grid(row=1, column=0, columnspan=6) self.pH.grid(row=2, column=0, columnspan=6) # create poll rate menu and deque size slector self.create_poll_rate_menu() #create deque size selector # currently disabled because larger deque size == longer loop evaluation, undesirable #self.create_deque_size_selector() # generate quit button tk.Button(master=self.parent, text='Quit', command=self._quit).grid(row=5, column=2, columnspan=2) # start updating that data self.update_frequency = POLL_RATE_ms self.update_data()
def test_db_insert_daily_change_of_deaths(self): self.db.create_deaths_change_python_table() daily_change = DataHandler().get_daily_change_of_deaths( self.total_deaths_df) changed_rows = self.db.insert_to_deaths_change_python_table( daily_change) rows_in_table = self.db.execute_query( "SELECT COUNT(*) FROM " + self.death_change_python_table)[0][0] self.assertEqual( rows_in_table, changed_rows, "Data rows in deaths_change_python table and the number of changed_rows must match" )
def __init__(self): plt.style.use('ggplot') size = 5000 self.data = DataHandler(size, usePickle=False) #Training parameters self.epochs = 100 self.batchSize = 32 self.validationSplit = 0.1 #Model parameters self.features = self.data.inputs.shape[1] self.styles = len(self.data.styles[0]) self.drives = len(self.data.drivetrains[0]) self.transmissions = len(self.data.speeds[0]) self.activ = 'linear' #Compile parameters self.optimizer = 'nadam' self.loss = 'huber_loss' self.metrics = ['mean_absolute_error'] self.makeModel() self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics) self.results = self.model.fit( { 'specs': self.data.inputs, 'style': self.data.styles, 'drive': self.data.drivetrains, 'trans': self.data.speeds }, self.data.targets, epochs=self.epochs, batch_size=self.batchSize, validation_split=self.validationSplit) self.graphTrainingResults() self.predicted = self.predictions() self.graphPredictions() self.model.save('model')
def detect_faces(self): logger.debug("Detect faces") # Read the image video_capture = cv2.VideoCapture(0) ret, image = video_capture.read() video_capture.release() # write the raw image to screenshot_path temp_file = "{}.new.png".format(self.screenshot_path) cv2.imwrite(temp_file, image) os.rename(temp_file, self.screenshot_path) # Detect faces in the image faces = self.faceCascade.detectMultiScale( image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE ) # Draw a rectangle around the faces num_faces = len(faces) if num_faces > 0: logger.debug("{} faces detected".format(num_faces)) # Draw a rectangle around the faces for (x, y, w, h) in faces: cv2.rectangle(image, (x, y), (x + w, y + h), (255, 36, 36), 5) temp_file = "{}.new.png".format(self.faces_path) cv2.imwrite(temp_file, image) os.rename(temp_file, self.faces_path) timestamp = time() # Introduce a bug on purpose in newer version # On older ubuntu core version, SNAP_VERSION is the sideloaded one, so we don't rely on that for now #if os.getenv("SNAP_VERSION", "0.1") != "0.1": # num_faces = -10 file_path = os.path.join(os.getenv("SNAP_APP_PATH"), "meta", "package.yaml") with suppress(IOError): with open(file_path, 'rt') as f: if yaml.load(f.read())["version"] != 0.1: num_faces = -10 DataHandler().add_one_facedetect_entry(int(time()), num_faces)
def main(tag, seed, dataset): opts = getattr(configs, 'config_%s' % dataset) opts['work_dir'] = './results/%s/' % tag if opts['verbose']: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') utils.create_dir(opts['work_dir']) utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints')) with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text: text.write('Parameters:\n') for key in opts: text.write('%s : %s\n' % (key, opts[key])) data = DataHandler(opts, seed) model = DGC(opts, tag) model.train(data)
def __init__(self, generator, test_dir='./data/test/', result_dir='./result_1/', weight_dir='./weight/generator_epoch_999.pkl', batch_size=32, patch_size=64, num_workers=8, self_dir=None, self_test=False, cuda=True, extensions=('.png', '.jpeg', '.jpg')): self.patch_size = patch_size self.result_dir = result_dir if not os.path.exists(self.result_dir): os.mkdir(self.result_dir) self.generator = generator self.weight_dir = weight_dir self.device = torch.device( "cuda:0" if torch.cuda.is_available() and cuda else "cpu") self.generator.load_state_dict( torch.load(self.weight_dir, map_location=self.device)) self.generator.eval() self.self_test = self_test if self.self_test: self.self_dir = self_dir self.extensions = extensions self.test_file = [ x.path for x in os.scandir(self.self_dir) if x.name.endswith(self.extensions) ] else: self.num_workers = num_workers self.batch_size = batch_size self.test_dh = DataHandler(test_dir, patch_size=self.patch_size, augment=False) self.test_loader = Data.DataLoader(self.test_dh, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=False)
def main(): if FLAGS.exp == 'dir64': opts = configs.config_dir64 else: assert False, 'Unknown experiment configuration' if FLAGS.zdim is not None: opts['zdim'] = FLAGS.zdim if opts['verbose']: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') data = DataHandler(opts) wae = WAE(opts) wae.restore_checkpoint(FLAGS.checkpoint) batch_img = data.data[0:2] enc_vec = wae.sess.run(wae.encoded, feed_dict={ wae.sample_points: batch_img, wae.is_training: False }) vdiff = enc_vec[1] - enc_vec[0] vdiff = vdiff / 10 gen_vec = np.zeros((10, vdiff.shape[0]), dtype=np.float32) for i in range(10): gen_vec[i, :] = enc_vec[0] + vdiff * i sample_gen = wae.sess.run(wae.decoded, feed_dict={ wae.sample_noise: gen_vec, wae.is_training: False }) img = np.hstack(sample_gen) img = (img + 1.0) / 2 plt.imshow(img) plt.savefig('analogy.png')
def main(): opts = configs.config_mnist opts['mode'] = 'train' if opts['verbose']: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') utils.create_dir(opts['work_dir']) utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints')) if opts['e_noise'] == 'gaussian' and opts['pz'] != 'normal': assert False, 'Gaussian encoders compatible only with Gaussian prior' return # Dumping all the configs to the text file with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text: text.write('Parameters:\n') for key in opts: text.write('%s : %s\n' % (key, opts[key])) # Loading the dataset data = DataHandler(opts) assert data.num_points >= opts['batch_size'], 'Training set too small' if opts['mode'] == 'train': # Creating WAE model wae = WAE(opts, data.num_points) # Training WAE wae.train(data) elif opts['mode'] == 'test': # Do something else improved_wae.improved_sampling(opts)
def input_fn(mode, params, ID=None, path=None): if mode == 'train' or mode == 'val': gen = DataHandler(mode).generate_batches elif mode == 'test_sequence': gen = DataHandler('test').generate_sequence elif mode == 'train_id': gen = DataHandler('train', ID).generate_sequence_ID elif mode == 'val_id': gen = DataHandler('val', ID).generate_sequence_ID elif mode == 'test_id': gen = DataHandler('test', ID).generate_sequence_ID elif mode == 'matched_id': gen = DataHandler('matched', ID).generate_sequence_ID elif mode == 'test_batch': gen = DataHandler('test').generate_batches ds = tf.data.Dataset.from_generator(gen, output_types=(tf.float32, tf.int32)) ds = ds.prefetch(buffer_size=params.buffer_size) # todo implement reading and indexing going on in generator as map and use map_and_batch return ds.make_one_shot_iterator().get_next()
def test_daily_change_calculation(self): dummy_data = testutils.get_dummy_data() d = DataHandler() deaths = d.get_total_deaths_per_country_and_day(dummy_data) expected_df = testutils.get_dummy_change_data() actual = d.get_daily_change_of_deaths(deaths) actual.reset_index(drop=True, inplace=True) self.assertTrue(expected_df.equals(actual), "Function generated daily change in deaths must be indentical to the expected") deaths.loc[0, 'deaths'] = 1 actual = d.get_daily_change_of_deaths(deaths) self.assertEqual(actual.loc[0, 'deaths_change'], 1, "The first day's change in deaths should be 1") deaths.loc[0, 'deaths'] = 4 actual = d.get_daily_change_of_deaths(deaths) self.assertEqual(actual.loc[0, 'deaths_change'], 4, "The first day's change in deaths should be 4")
class Anagramer(object): """ Anagramer hunts for anagrams on twitter. """ def __init__(self): self.twitter_handler = TwitterHandler() self.stream_handler = StreamHandler() self.stats = AnagramStats() self.data = None # wait until we get run call to load data # self.time_to_save = self.set_save_time() def run(self, source=None): """ starts the program's main run-loop """ self.data = DataHandler(delegate=self) if not source: while 1: try: if not self.data: self.data = DataHandler() if not self.stats: self.stats = AnagramStats() if not self.stream_handler: self.stream_handler = StreamHandler() logging.info('entering run loop') self.start_stream() except KeyboardInterrupt: break except NeedsSave: print('\nclosing stream for scheduled maintenance') # todo: this is where we'd handle pruning etc finally: self.stream_handler.close() self.stream_handler = None self.data.finish() self.data = None self.stats.close() self.stats = None else: # means we're running from local data self.run_with_data(source) def start_stream(self): """ main run loop """ self.stats.start_time = time.time() self.stream_handler.start() for tweet in self.stream_handler: self.update_console() self.process_input(tweet) def run_with_data(self, data): """ uses a supplied data source instead of a twitter connection (debug) """ self.stats.start_time = time.time() self.stream_handler.start(source=data) # for tweet in data: # self.process_input(tweet) # # time.sleep(0.0001) # self.stats.tweets_seen += 1 # self.stats.passed_filter += 1 # self.update_console() logging.debug('hits %g matches %g' % (self.stats.possible_hits, self.stats.hits)) self.data.finish() def process_input(self, hashed_tweet): self.stats.new_hash(hashed_tweet['hash']) self.data.process_tweet(hashed_tweet) def process_hit(self, tweet_one, tweet_two): """ called by datahandler when it has found a match in need of review. """ self.stats.possible_hits += 1 self.stats.new_hit(tweet_one['hash']) if self.compare(tweet_one['text'], tweet_two['text']): hit = { "id": int(time.time()*1000), "status": HIT_STATUS_REVIEW, "tweet_one": tweet_one, "tweet_two": tweet_two, } self.data.remove(tweet_one['hash']) self.data.add_hit(hit) self.stats.hits += 1 else: pass def compare(self, tweet_one, tweet_two): """ most basic test, finds if tweets are just identical """ if not self.compare_chars(tweet_one, tweet_two): return False if not self.compare_words(tweet_one, tweet_two): return False return True def compare_chars(self, tweet_one, tweet_two, cutoff=0.5): """ basic test, looks for similarity on a char by char basis """ stripped_one = utils.stripped_string(tweet_one) stripped_two = utils.stripped_string(tweet_two) total_chars = len(stripped_two) same_chars = 0 for i in range(total_chars): if stripped_one[i] == stripped_two[i]: same_chars += 1 if (float(same_chars) / total_chars) < cutoff: return True return False def compare_words(self, tweet_one, tweet_two, cutoff=0.5): """ looks for tweets containing the same words in different orders """ words_one = utils.stripped_string(tweet_one, spaces=True).split() words_two = utils.stripped_string(tweet_two, spaces=True).split() word_count = len(words_one) if len(words_two) < len(words_one): word_count = len(words_two) same_words = 0 # compare words to each other: for word in words_one: if word in words_two: same_words += 1 # if more then $CUTOFF words are the same, fail test if (float(same_words) / word_count) < cutoff: return True else: return False def check_save(self): """check if it's time to save and save if necessary""" if (time.time() > self.time_to_save): self.time_to_save = self.set_save_time() raise NeedsSave # displaying data while we run: def update_console(self): """ prints various bits of status information to the console. """ # what all do we want to have, here? let's blueprint: # tweets seen: $IN_HAS_TEXT passed filter: $PASSED_F% Hits: $HITS seen_percent = int(100*(float( self.stream_handler.passed_filter)/self.stream_handler.tweets_seen)) runtime = time.time()-self.stats.start_time status = ( 'tweets seen: ' + str(self.stream_handler.tweets_seen) + " passed filter: " + str(self.stream_handler.passed_filter) + " ({0}%)".format(seen_percent) + " hits " + str(self.stats.possible_hits) + " agrams: " + str(self.stats.hits) + " buffer: " + str(self.stream_handler.bufferlength()) + " runtime: " + utils.format_seconds(runtime) ) sys.stdout.write(status + '\r') sys.stdout.flush() def print_hits(self): hits = self.data.get_all_hits() for hit in hits: print(hit['tweet_one']['text'], hit['tweet_one']['id']) print(hit['tweet_two']['text'], hit['tweet_two']['id'])
def main(): opts = {} opts['random_seed'] = 821 opts['dataset'] = 'gmm' # gmm, circle_gmm, mnist, mnist3, cifar ... opts['unrolled'] = FLAGS.unrolled # Use Unrolled GAN? (only for images) opts['unrolling_steps'] = 5 # Used only if unrolled = True opts['data_dir'] = 'mnist' opts['trained_model_path'] = 'models' opts[ 'mnist_trained_model_file'] = 'mnist_trainSteps_19999_yhat' # 'mnist_trainSteps_20000' opts['gmm_max_val'] = 15. opts['toy_dataset_size'] = 64 * 1000 opts['toy_dataset_dim'] = 2 opts['mnist3_dataset_size'] = 2 * 64 # 64 * 2500 opts['mnist3_to_channels'] = False # Hide 3 digits of MNIST to channels opts[ 'input_normalize_sym'] = False # Normalize data to [-1, 1], applicable only for image datasets opts['adagan_steps_total'] = 10 opts['samples_per_component'] = 5000 # 50000 opts['work_dir'] = FLAGS.workdir opts['is_bagging'] = FLAGS.is_bagging opts['beta_heur'] = 'uniform' # uniform, constant opts['weights_heur'] = 'theory_star' # theory_star, theory_dagger, topk opts['beta_constant'] = 0.5 opts['topk_constant'] = 0.5 opts["init_std"] = FLAGS.init_std opts["init_bias"] = 0.0 opts['latent_space_distr'] = 'normal' # uniform, normal opts['optimizer'] = 'sgd' # sgd, adam opts["batch_size"] = 64 opts["d_steps"] = 1 opts["g_steps"] = 1 opts["verbose"] = True opts['tf_run_batch_size'] = 100 opts['objective'] = 'JS' opts['gmm_modes_num'] = 3 opts['latent_space_dim'] = FLAGS.zdim opts["gan_epoch_num"] = 15 opts["mixture_c_epoch_num"] = 5 opts['opt_learning_rate'] = FLAGS.learning_rate opts['opt_d_learning_rate'] = FLAGS.d_learning_rate opts['opt_g_learning_rate'] = FLAGS.g_learning_rate opts["opt_beta1"] = FLAGS.adam_beta1 opts['batch_norm_eps'] = 1e-05 opts['batch_norm_decay'] = 0.9 opts['d_num_filters'] = 16 opts['g_num_filters'] = 16 opts['conv_filters_dim'] = 4 opts["early_stop"] = -1 # set -1 to run normally opts["plot_every"] = 500 # set -1 to run normally opts["eval_points_num"] = 1000 # 25600 opts['digit_classification_threshold'] = 0.999 opts['inverse_metric'] = False # Use metric from the Unrolled GAN paper? opts['inverse_num'] = 1 # Number of real points to inverse. saver = utils.ArraySaver('disk', workdir=opts['work_dir']) if opts['verbose']: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') opts["number_of_runs"] = 15 likelihood = np.empty((opts["adagan_steps_total"], opts["number_of_runs"])) coverage = np.empty((opts["adagan_steps_total"], opts["number_of_runs"])) for run in range(opts["number_of_runs"]): logging.info('Beginning run {} of {}'.format(run + 1, opts["number_of_runs"])) opts['random_seed'] += 1 utils.create_dir(opts['work_dir']) with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text: text.write('Parameters:\n') for key in opts: text.write('%s : %s\n' % (key, opts[key])) data = DataHandler(opts) # saver.save('real_data_{0:02d}.npy'.format(run), data.data) saver.save( 'real_data_params_mean_{0:02d}_var_{1:1.2f}.npy'.format( run, data.var), data.mean) # assert data.num_points >= opts['batch_size'], 'Training set too small' # adagan = AdaGan(opts, data) # metrics = Metrics() for step in range(opts["adagan_steps_total"]): logging.info('Running step {} of AdaGAN'.format(step + 1)) adagan.make_step(opts, data) num_fake = opts['eval_points_num'] logging.debug('Sampling fake points') fake_points = adagan.sample_mixture(num_fake) saver.save('fake_points_{:02d}.npy'.format(step), fake_points) logging.debug('Sampling more fake points') more_fake_points = adagan.sample_mixture(500) logging.debug('Plotting results') metrics.make_plots(opts, step, data.data[:500], fake_points[0:100], adagan._data_weights[:500]) logging.debug('Evaluating results') (lh, C) = metrics.evaluate(opts, step, data.data, fake_points, more_fake_points, prefix='') likelihood[step, run] = lh coverage[step, run] = C saver.save('likelihood.npy', likelihood) saver.save('coverage.npy', coverage) logging.debug("AdaGan finished working!")
class Subset(SubsetBase): K_FEATURES = 10 def __init__(self, indices, data_handler=None, data=None): self.indices = indices self.labels = None if data_handler: self.data_handler = data_handler else: self.data_handler = DataHandler(data) def get_size(self): """ Returns the size of the subset i.e the number of rows in the subset """ return len(self.indices) def purity(self): """ Determines the "purity" of the subset by calculating the gini index of the data """ return self.data_handler.gini_index(self.indices) def majority_label(self): """ Returns the mode of the all the labels in the subset """ labels = self.data_handler.get_freq(self.indices) # Loop frequency hash and find the mode majority, count = None, -1 for label, value in labels.iteritems(): if value > count: majority = label return majority def split(self): """ Returns a tuple of arrays of (feature, values, subsets) given the feature to split on. """ n, f = self.data_handler.get_shape() # Selects k features without replacement features = random.sample(range(1, f), self.K_FEATURES) # Calculate the gini index of k different splits splits = {} for feature in features: (gini, threshold) = self.data_handler.test_split(self.indices, feature) splits[feature] = {"threshold": threshold, "gini": gini} # Finds the optimal split from all the splits above best_feature, threshold, min_gini = None, None, 100 for feature, results in splits.iteritems(): if results["gini"] < min_gini: best_feature, threshold, min_gini = feature, results["threshold"], results["gini"] # Split the subset subset_left, subset_right = self.get_subsets(best_feature, threshold) return best_feature, threshold, subset_left, subset_right def get_subsets(self, feature, threshold): """ Splits the current subset into two based on the input feature and threshold """ left_indices, right_indices = self.data_handler.split(self.indices, feature, threshold) left_subset = Subset(left_indices, data_handler=self.data_handler) right_subset = Subset(right_indices, data_handler=self.data_handler) return left_subset, right_subset
indexStart = x.index(itemStart) if itemEnd > x[-1]: indexEnd = len(x) - 1 else: indexEnd = x.index(itemEnd) # test # indexStart = 10 # indexEnd = 60 x = x[indexStart:indexEnd + 1] y[0] = y[0][indexStart:indexEnd + 1] y[1] = y[1][indexStart:indexEnd + 1] fig = pl.figure() # http://stackoverflow.com/questions/11617719/how-to-plot-a-very-simple-bar-chart-python-matplotlib-using-input-txt-file width = .5 ind = np.arange(len(x)) pl.bar(ind, y[0], width=width, color='green', alpha=0.5) pl.bar(ind, y[1], width=width, color='yellow', alpha=0.5) # pl.bar([0, 20, 50], [40, 60, 120], width=width, color = 'red') pl.xticks(ind + width / 2, x) fig.autofmt_xdate() pyplot.show() if __name__ == '__main__': [X, y] = DataHandler.getTrainingData() x = X[0] dates = [datetime.datetime.strptime(item, "%Y-%m-%d %H:%M:%S") for item in X[0]] DataVisualization.barPlotTemporalData(dates, y, '2011-01-01 16:00:00', '2011-01-03 15:00:00')
class Backtest(object): def __init__(self, strategy, portfolio, analyser, **kwargs): self.strategy = strategy self.portfolio = portfolio self.analyser = [analyser] if type(analyser) != list else analyser self.backtest_modules = [self, self.strategy, self.portfolio] self.backtest_modules.extend(self.analyser) self.symbols = None self.qcodes = None self.date_start = None self.date_end = None self.frequency = None self.datas = None self.trade_time = None self.benchmark = None self.benchmark_qcode = None for module in self.backtest_modules: module.__dict__.update(kwargs) #self.__dict__.update(kwargs) self.validate_input() self.data_handler = DataHandler(self.symbols, self.qcodes, self.date_start, self.date_end, self.frequency, self.datas) self.benchmark_handler = DataHandler([self.benchmark], [self.benchmark_qcode], self.date_start, self.date_end, self.frequency, self.datas) def validate_input(self): if self.symbols is None: raise ValueError, "Need to choose symbols to trade" if self.benchmark is None: print "No benchmark specified. Default is SPY" self.benchmark = 'SPY' self.benchmark_qcode = 'GOOG/NYSE_SPY' if not os.path.exists(self.options.outdir): os.mkdir(self.options.outdir) def run(self): print "\n\nHandling data" datas_symbols = self.data_handler.generate_data() datas_benchmark = self.benchmark_handler.generate_data() for module in self.backtest_modules: module.datas_symbols = datas_symbols module.datas_benchmark = datas_benchmark module.prices = datas_symbols[self.trade_time] module.prices_bm = datas_benchmark[self.trade_time] print "\n\nGenerating signals" self.strategy.begin() self.strategy.generate_signals() for module in self.backtest_modules: module.__dict__.update(self.strategy.__dict__) print "\n\nBacktesting portfolio" self.portfolio.begin() self.portfolio.generate_returns() for module in self.backtest_modules: module.__dict__.update(self.portfolio.__dict__) print "\n\nAnalysing results" for analyser in self.analyser: analyser.begin() analyser.generate_analysis()
class MotherRussia: '''Mother russia functions as the program object''' def __init__(self, debug_mode=False, timeout=10.0): self.data_handler = DataHandler() self.connector = SocketHandler(timeout) self.debug = debug_mode self.bot = Bot() self.DEFAULT_TICKS = 2*(1000//50) self.ticks = self.DEFAULT_TICKS def __enter__(self): return self def __exit__(self, exec_type, value, traceback): if isinstance(value, KeyboardInterrupt): print('\r\rRecieved keyboard interrupt') elif isinstance(value, SystemExit): print('Recieved system exit signal') elif isinstance(value, Exception): print('Exception: ', value) print('Attempting to clean up...') clean_error = self.clean() if isinstance(clean_error, Exception): print('Could not clean up: ', clean_error) else: print('Done') if not self.debug: return True def init(self): socket_error = self.connector.connect() if isinstance(socket_error, Exception): raise socket_error self.connector.send_data('NAME Putin') def run(self): while True: raw_data = self.connector.poll_data() if len(raw_data) == 0: break json_error = self.data_handler.parse_data(raw_data) if isinstance(json_error, ValueError): # The exception will contain the string 'Extra data' if the # raw data it received was incomplete. Therefore, try to # receive new raw data if 'Extra data' in str(json_error): continue else: # In most cases, this error will be 'Expecting value', # because the block of raw data it received was empty raise json_error if self.data_handler.is_dead or self.data_handler.is_end_of_round: self.ticks = self.DEFAULT_TICKS start = time.perf_counter() self.bot.update_state(self.data_handler) self.bot.make_decisions(self.ticks) elapsed_time = (time.perf_counter() - start)*1000 if elapsed_time > 45 and self.ticks > 0.5*(1000//50): self.ticks -= 1 elif elapsed_time < 30 and self.ticks < 4*(1000//50): self.ticks += 1 # print(elapsed_time, self.ticks) while len(self.bot.commands) > 0: command = self.bot.get_command() self.connector.send_data(command) self.clean() def clean(self): try: if self.connector.sock is not None: self.connector.close() except Exception as e: return e
from sklearn.metrics import mean_squared_error import numpy as np import evaluation from sklearn.cross_validation import KFold from sklearn.feature_selection import RFE from sklearn.feature_selection import SelectKBest from datahandler import DataHandler def f_regression(X, Y): import sklearn return sklearn.feature_selection.f_regression(X, Y, center=False) # center=True (the default) would not work ("ValueError: center=True only allowed for dense data") but should presumably work in general if __name__ == '__main__': [X, y] = DataHandler.getTrainingData() X = DataHandler.getFeatures(X) yCasual = y[0] yRegistered = y[1] kf = KFold(len(X), n_folds=10) scoresCasualExtraTreesRegression = [] scoresRegisteredExtraTreesRegression = [] scoresTotalExtraTreesRegression = [] scoresCasualABR = [] scoresRegisteredABR = [] scoresTotalABR = [] mdlExtraTreesRegressorCasual = None
class LabDAQ(tk.Frame): #, sH.SerialHandler, dH.DataHandler, pH.PlotHandler): def __init__(self, parent = None, *args, **kwargs): # Initialize TkInter frame and define parent tk.Frame.__init__(self, parent ) self.parent = parent #initialize a starting deque size and poll rate self.pollRate = POLL_RATE_ms self.dequeSize = 100 # Create arduino device and datadeque as object parameters self.device = arduino() self.dataHandler = DataHandler(dequeLength=DATA_POINTS_PER_PLOT) # Create serial port frame, data handling frame, and plot frame self.sH = sH.SerialHandlerUI( parent = parent, device = self.device ) self.dH = dH.DataHandlerUI( parent = parent, dataHandler = self.dataHandler ) self.pH = pH.PlotHandlerUI( parent = parent, dataHandler = self.dataHandler ) # place into UI self.sH.grid(row=0, column=0, columnspan=6) self.dH.grid(row=1, column=0, columnspan=6) self.pH.grid(row=2, column=0, columnspan=6) # create poll rate menu and deque size slector self.create_poll_rate_menu() #create deque size selector # currently disabled because larger deque size == longer loop evaluation, undesirable #self.create_deque_size_selector() # generate quit button tk.Button(master=self.parent, text='Quit', command=self._quit).grid(row=5, column=2, columnspan=2) # start updating that data self.update_frequency = POLL_RATE_ms self.update_data() def update_data(self): # call this function again after {self.update_frequency time} (in ms) self.parent.after(self.update_frequency, self.update_data) #print "testing %s" % self.update_frequency #if device is connected, if self.device.is_connected(): dataRow, dataFlag = self.device.poll() #read in data if dataFlag: # send the data to be sorted and added to channels # and simultaneously collect whether or not an alarm was triggered alarmStatus = self.dataHandler.append_data( dataRow ) # update the plot self.pH.update_plots() # sound the alarm! but only if an alarm was triggered if alarmStatus: self.device.trigger_alarm() else: pass def create_poll_rate_menu(self): # change polling rate option tk.Label(self.parent, text="Choose Polling Rate:").grid(row=3, column=0) self.pollRateTk=tk.DoubleVar() pollRateOptions = [ 0.25, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0] #create option menu and place into UI menu=tk.OptionMenu(self.parent, self.pollRateTk, *pollRateOptions) self.pollRateTk.set( str(POLL_RATE_ms/1000.) ) menu.grid(row=3, column=1,columnspan=2) # label for units tk.Label(self.parent, text="sampling interval in seconds").grid(row=3, column=3) # create button to update poll rate tk.Button(self.parent, text='Update Polling Rate', state=tk.NORMAL, command=self.update_poll_rate).grid(row=3,column=4) def create_deque_size_selector(self): # change polling rate option tk.Label(self.parent, text="Choose # Data Points Plotted:").grid(row=4, column=0) self.dequeSizeTk=tk.IntVar() dequeSizeOptions = [ 100, 250, 500, 750, 1000, 5000, 10000] #create option menu and place into UI menu=tk.OptionMenu(self.parent, self.dequeSizeTk, *dequeSizeOptions) menu.grid(row=4, column=1,columnspan=2) # update deque size Tk variable to current deque size self.dequeSizeTk.set( self.dequeSize ) # label for units tk.Label(self.parent, text="data points").grid(row=4, column=3) # create button to update poll rate tk.Button(self.parent, text='Update #Points/Plot', state=tk.NORMAL, command=self.update_deque_size).grid(row=4,column=4) def update_poll_rate(self): new_rate = float(self.pollRateTk.get()) * 1000 self.update_frequency = int(new_rate) def update_deque_size(self): self.dataDeque.set_deque_length( int(self.dequeSizeTk.get()) ) self.pH.update_data_deque(self.dataDeque) def _quit(self): self.device.disconnect() self.parent.quit() self.parent.destroy()
def train(self): outputPrefix=self.readField(self.config,self.name,"output_directory") outputDir=os.path.join(outputPrefix,self.name) if not os.path.exists(outputDir): os.makedirs(outputDir) showFreq = int(self.readField(self.config, self.name, "show_freq")) if showFreq > 0: visDir = os.path.join(outputDir,'vis') if not os.path.exists(visDir): os.mkdir(visDir) #do normalization for images if they are not normalized before normalize=self.str2bool(self.readField(self.config, self.name, "normalize")) trainDataSize=int(self.readField(self.config, self.name, "train_size")) numBatch = trainDataSize / self.batchsize trainDataPath = self.readField(self.config, self.name, "train_data") if self.readField(self.config,self.name,"extract_reps")=="True": trainRepsPath=self.readField(self.config, self.name, "train_reps") else: trainRepsPath=None trainDataLoader=DataHandler(trainDataPath, trainRepsPath, self.vDim, self.hDim, self.batchsize,numBatch, normalize) evalFreq=int(self.readField(self.config,self.name,'eval_freq')) if evalFreq!=0: qsize=int(self.readField(self.config, self.name, "query_size")) evalPath=self.readField(self.config,self.name,"validation_data") labelPath=self.readField(self.config,self.name,"label") queryPath=self.readField(self.config, self.name, "query") label=np.load(labelPath) eval=Evaluator(queryPath,label ,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose) validation_data=gp.garray(np.load(evalPath)) if normalize: validation_data=trainDataLoader.doNormalization(validation_data) maxEpoch = int(self.readField(self.config, self.name, "max_epoch")) nCommon, nMetric, title=self.getDisplayFields() if self.verbose: print title for epoch in range(maxEpoch): perf=np.zeros( nMetric) trainDataLoader.reset() for i in range(numBatch): batch = trainDataLoader.getOneBatch() curr = self.trainOneBatch(batch, epoch, computeStat=True) perf=self.aggregatePerf(perf, curr) if showFreq != 0 and (1+epoch) % showFreq == 0: validation_code=self.getReps(validation_data) np.save(os.path.join(visDir, '%dvis' % (1+epoch)), validation_code) if evalFreq !=0 and (1+epoch) % evalFreq ==0: validation_code=self.getReps(validation_data) eval.evalSingleModal(validation_code,epoch,self.name+'V') validation_code=None if self.verbose: self.printEpochInfo(epoch,perf,nCommon) if self.readField(self.config,self.name,"checkpoint")=="True": self.doCheckpoint(outputDir) if self.readField(self.config,self.name,"extract_reps")=="True": if evalFreq!=0: validation_reps_path=self.readField(self.config, self.name, "validation_reps") self.extractValidationReps(validation_data, validation_reps_path) self.extractTrainReps(trainDataLoader, numBatch) self.saveConfig(outputDir)
def train(self): outputPrefix=self.readField(self.config,self.name,"output_directory") outputDir=os.path.join(outputPrefix,self.name) if not os.path.exists(outputDir): os.mkdir(outputDir) imageinput = self.readField(self.isae.ae[1].config, self.isae.ae[1].name, "train_data") textinput = self.readField(self.tsae.ae[1].config, self.tsae.ae[1].name, "train_data") if self.readField(self.config, self.name,"extract_reps")=="True": imageoutput=self.readField(self.isae.ae[-1].config, self.isae.ae[-1].name, "train_reps") textoutput=self.readField(self.tsae.ae[-1].config, self.tsae.ae[-1].name, "train_reps") else: imageoutput=None textoutput=None maxEpoch = int(self.readField(self.config, self.name, "max_epoch")) trainSize=int(self.readField(self.config, self.name, "train_size")) numBatch = int(trainSize / self.batchsize) normalizeImg=self.str2bool(self.readField(self.config, self.name, "normalize")) imgTrainDH=DataHandler(imageinput, imageoutput, self.isae.ae[1].vDim, self.isae.ae[-1].hDim, self.batchsize, numBatch,normalizeImg) txtTrainDH=DataHandler(textinput, textoutput, self.tsae.ae[1].vDim, self.tsae.ae[-1].hDim, self.batchsize, numBatch) showFreq = int(self.readField(self.config, self.name, "show_freq")) if showFreq > 0: visDir = os.path.join(outputDir, "vis") if not os.path.exists(visDir): os.makedirs(visDir) evalFreq = int(self.readField(self.config, self.name, "eval_freq")) if evalFreq!=0: qsize=int(self.readField(self.config, self.name, "query_size")) labelPath=self.readField(self.config,self.name,"label") label=np.load(labelPath) queryPath=self.readField(self.config, self.name, "query") validation=evaluate.Evaluator(queryPath,label,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose) validateImagepath = self.readField(self.isae.ae[1].config, self.isae.ae[1].name, "validation_data") validateTextpath = self.readField(self.tsae.ae[1].config, self.tsae.ae[1].name, "validation_data") validateImgData = gp.garray(np.load(validateImagepath)) if normalizeImg: validateImgData=imgTrainDH.doNormalization(validateImgData) validateTxtData = gp.garray(np.load(validateTextpath)) else: print "Warning: no evluation setting!" nCommon, nMetric, title=self.getDisplayFields() if self.verbose: print title for epoch in range(maxEpoch): perf=np.zeros( nMetric) epoch1, imgcost, txtcost, diffcost=self.checkPath(epoch) imgTrainDH.reset() txtTrainDH.reset() for i in range(numBatch): img = imgTrainDH.getOneBatch() txt = txtTrainDH.getOneBatch() curr= self.trainOneBatch(img, txt, epoch1, imgcost, txtcost, diffcost) perf=self.aggregatePerf(perf, curr) if evalFreq!=0 and (1+epoch) % evalFreq == 0: imgcode,txtcode=self.getReps(validateImgData, validateTxtData) validation.evalCrossModal(imgcode,txtcode,epoch,'V') if showFreq != 0 and (1+epoch) % showFreq == 0: imgcode,txtcode=self.getReps(validateImgData, validateTxtData) np.save(os.path.join(visDir,'%simg' % str((epoch+1)/showFreq)),imgcode) np.save(os.path.join(visDir,'%stxt' % str((epoch+1)/showFreq)),txtcode) if self.verbose: self.printEpochInfo(epoch, perf, nCommon) if self.readField(self.config, self.name, "checkpoint")=="True": self.doCheckpoint(outputDir) if self.readField(self.config, self.name,"extract_reps")=="True": if evalFreq!=0: self.extractValidationReps(validateImgData, validateTxtData, "validation_data","validation_reps") self.extractTrainReps(imgTrainDH, txtTrainDH, numBatch) self.saveConfig(outputDir)