def addBatch(self, ID, course, faculty, students): batch = Batch() batch.setID(ID) batch.setCourse(course) batch.setFaculty(faculty) batch.setStudents(students) self.batches.append(batch)
def retrieve_citations(self): if not os.path.exists(self.tmp_dir): os.mkdir(self.tmp_dir) #Creates arXiv_citationqueue.txt if it doesn't exist by finding all the gz files in the extract folder if not os.path.exists(self.citation_queue): call('find {source_dir}*.gz -type f > {target_file}'.format( source_dir=self.extract_dir, target_file=self.citation_queue), shell=True) # Initialise some variables batcher = Batch.Batch() while True: file_name = fq.get(self.citation_queue) if file_name is None: break arxiv_id = os.path.splitext(os.path.split(file_name)[1])[0] print "Retrieving citations", arxiv_id uncompressed_tmp = self.tmp_dir + arxiv_id if not os.path.exists(uncompressed_tmp): os.mkdir(uncompressed_tmp) returncode = call( ["tar", "xzf", file_name, "-C", uncompressed_tmp]) if ( returncode == 1 ): #there was an error, so perhaps its not a Tar file. Instead try to decompress with plain old gunzip print "trying to gunzip instead for " + file_name os.system("gunzip -c %s > %s" % (file_name, uncompressed_tmp + "/file.tex")) #Now process .tex files for tex_file_name in os.listdir(uncompressed_tmp): if not (tex_file_name.endswith('.tex') or tex_file_name.endswith('.bbl')): continue citations = self.settings["metadata_reader"].process( arxiv_id, uncompressed_tmp + '/' + tex_file_name) #Store the citations in BibServer self.store_citations(batcher, arxiv_id, citations) #print "CITATIONS for " + arxiv_id #print citations # Delete temporary files if call('rm -R ' + uncompressed_tmp + '*', shell=True): break fq.pop(self.citation_queue) batcher.clear()
def __init__(self, _modelRunTitle, run_codes, _db): # add run title. self.modelRunTitle = self._addTitle(_modelRunTitle) # add run codes. self.run_codes = run_codes # container to pass info around. self.cont = Container.Container() self.cont.set('modelRunTitle', self._addTitle(_modelRunTitle)) self.cont.set('run_codes', run_codes) self.cont.set('path', 'C:/Nonroad/%s/' % (_modelRunTitle)) self.cont.set('db', _db) self.cont.set('qr', qr.QueryRecorder(self.cont.get('path'))) # create Batch runner. self.batch = Batch.Batch(self.cont)
def addStudentToBatch(self, student): listCourseStudent = student.getCouses() for i in range(0, listCourseStudent.__len__()): batchlist = self.getAllBatch() noBatch = 0 for j in range(0, batchlist.__len__()): if (listCourseStudent[i] == batchlist[j].courseName): batchlist[j].addStudent(student) noBatch = 1 if (noBatch == 0): newBatch = b.Batch() newBatch.courseName = listCourseStudent[i] self.listBatch.append(newBatch) i -= 1
def __init__(self, filename, settings=Config.importer['load']['pubmedcentral'], options=[]): self.settings = settings # relevant configuration settings self.options = options # command-line options/arguments self.filename = filename self.procid = uuid.uuid4().hex if self.settings['skip_tar']: self.workdir = self.settings['workdir'] + os.listdir( self.settings['workdir'])[0] + '/' else: self.workdir = self.settings['workdir'] + self.procid + '/' self.b = Batch.Batch() self.m = MetadataReaders.MetadataReaderPMC() if not os.path.exists(self.settings['workdir']): try: os.makedirs(self.settings['workdir']) except: pass if not os.path.exists(self.workdir): os.makedirs(self.workdir)
loss_record[mode][k] = loss_record_loaded[mode][k] else: loss_record = {} loss_record['train'] = Utils.Loss_Record() loss_record['val'] = Utils.Loss_Record() rate_counter = Utils.Rate_Counter() DD = Data.Data() timer = {} timer['train'] = Timer(60 * 30) timer['val'] = Timer(60 * 3) trial_loss_record = {} batch = Batch.Batch({'net': net, 'batch_size': P.BATCH_SIZE}) while True: for mode in ['train', 'val']: timer[mode].reset() while not timer[mode].check(): if mode == 'val': net.eval() else: net.train() batch['fill']({'Data': DD, 'mode': mode})
def main(): logging.basicConfig(filename='training.log', level=logging.DEBUG) logging.debug(ARGS) # Log arguments net = SqueezeNet() if ARGS.resume_path is not None: cprint('Resuming w/ ' + ARGS.resume_path, 'yellow') net.model_init(ARGS.resume_path) else: net.model_init() net.net.summary() data = Data.Data() batch = Batch.Batch(net) # Maitains a list of all inputs to the network, and the loss and outputs for # each of these runs. This can be used to sort the data by highest loss and # visualize, to do so run: # display_sort_trial_loss(data_moment_loss_record , data) data_moment_loss_record = {} rate_counter = Utils.RateCounter() def run_net(data_index, mode): batch.fill(data, data_index) # Get batches ready batch.forward_backward(data_moment_loss_record, mode) try: epoch = 0 avg_train_loss = Utils.LossLog() avg_val_loss = Utils.LossLog() while True: logging.debug('Starting training epoch #{}'.format(epoch)) # Train mode epoch_train_loss = Utils.LossLog() print_counter = Utils.MomentCounter(ARGS.print_moments) while not data.train_index.epoch_complete: # Epoch of training run_net(data.train_index, 'train') # Run network, Backpropagate # Logging Loss epoch_train_loss.add(data.train_index.ctr, batch.loss) rate_counter.step() if print_counter.step(data.train_index): epoch_train_loss.export_csv( 'logs/epoch%02d_train_loss.csv' % (epoch, )) print('mode = train\n' 'ctr = {}\n' 'most recent loss = {}\n' 'epoch progress = {} \n' 'epoch = {}\n'.format( data.train_index.ctr, batch.loss, 100. * data.train_index.ctr / len(data.train_index.valid_data_moments), epoch)) print('Save model snapshot...') weights_file_name = "epoch{}_save".format(epoch) Utils.save_net(weights_file_name, net, snap=True) K.clear_session() net.model_init( os.path.join(ARGS.save_path, weights_file_name + '_snap.hdf5')) if ARGS.display: batch.display() plt.figure('loss') plt.clf() # clears figure data.train_index.epoch_complete = False logging.info('Avg Train Loss = {}'.format( epoch_train_loss.average())) avg_train_loss.add(epoch, epoch_train_loss.average()) avg_train_loss.export_csv('logs/avg_train_loss.csv') logging.debug('Finished training epoch #{}'.format(epoch)) # Evaluate mode epoch_val_loss = Utils.LossLog() logging.debug('Starting validation epoch #{}'.format(epoch)) print_counter = Utils.MomentCounter(ARGS.print_moments) while not data.val_index.epoch_complete: run_net(data.val_index, 'eval') # Run network epoch_val_loss.add(data.train_index.ctr, batch.loss) if print_counter.step(data.val_index): epoch_val_loss.export_csv('logs/epoch%02d_val_loss.csv' % (epoch, )) print('mode = validation\n' 'ctr = {}\n' 'average val loss = {}\n' 'epoch progress = {} %\n' 'epoch = {}\n'.format( data.val_index.ctr, epoch_val_loss.average(), 100. * data.val_index.ctr / len(data.val_index.valid_data_moments), epoch)) data.val_index.epoch_complete = False avg_val_loss.add(epoch, epoch_val_loss.average()) avg_val_loss.export_csv('logs/avg_val_loss.csv') logging.debug('Finished validation epoch #{}'.format(epoch)) logging.info('Avg Val Loss = {}'.format(epoch_val_loss.average())) Utils.save_net( "epoch%02d_save_%f" % (epoch, epoch_val_loss.average()), net) epoch += 1 except Exception: traceback.print_exc(file=sys.stdout) logging.error(traceback.format_exc()) # Log exception # Interrupt Saves Utils.save_net('interrupt_save', net) epoch_train_loss.export_csv('logs/interrupt%02d_train_loss.csv' % (epoch, )) epoch_val_loss.export_csv('logs/interrupt%02d_val_loss.csv' % (epoch, ))
def train_and_model(model): # TODO LOOK ME this make show all tensor belong cpu or gpu # with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess, tf.device("/CPU:0"): with tf.Session() as sess: # checkpoint val saver = tf.train.Saver() # tensorboard train_writer = tf.summary.FileWriter(FLAGS.dir_train_tensorboard, sess.graph) test_writer = tf.summary.FileWriter(FLAGS.dir_test_tensorboard) # train step print("Train Start...") train_batch_config = Batch.Config() train_batch = Batch.Batch(train_batch_config) sess.run(model["init_op"]) for step in range(FLAGS.max_train_step + 1): key_list = [ Batch.INPUT_DATA, Batch.OUTPUT_LABEL, Batch.OUTPUT_DATA ] data = train_batch.next_batch(FLAGS.batch_size, key_list) feed_dict = { model["X"]: data[Batch.INPUT_DATA], model["Y"]: data[Batch.OUTPUT_DATA], model["Y_label"]: data[Batch.OUTPUT_LABEL] } sess.run(model["train_op"], feed_dict) # print log if step % FLAGS.print_log_step_size == 0: summary_train, _acc, _cost = sess.run( [model["summary"], model["batch_acc"], model["cost"]], feed_dict=feed_dict) print(datetime.datetime.utcnow(), "train step: %d" % step, "batch_acc:", _acc, "cost:", _cost) # checkpoint if step % FLAGS.checkpoint_step_size == 0: saver.save(sess, FLAGS.dir_train_checkpoint, global_step=step) # summary tensorboard if step % FLAGS.summary_step_size: train_writer.add_summary(summary=summary_train, global_step=step) # test step print("Test Start...") test_batch_config = Batch.Config() test_batch = Batch.Batch(test_batch_config) total_acc = 0. for step in range(FLAGS.max_test_step + 1): key_list = [ Batch.INPUT_DATA, Batch.OUTPUT_LABEL, Batch.OUTPUT_DATA ] data = test_batch.next_batch(FLAGS.batch_size, key_list) feed_dict = { model["X"]: data[Batch.INPUT_DATA], model["Y"]: data[Batch.OUTPUT_DATA], model["Y_label"]: data[Batch.OUTPUT_LABEL] } # print("input:", data[Batch.INPUT_DATA]) # print("output:", data[Batch.OUTPUT_DATA]) # print("label:", data[Batch.OUTPUT_LABEL]) summary_test, _acc = sess.run( [model["summary"], model["batch_acc"]], feed_dict=feed_dict) print(datetime.datetime.utcnow(), "test step: %d" % step, "batch_acc: ", _acc) total_acc += _acc if step % FLAGS.print_log_step_size == 0: summary_test, _acc = sess.run( [model["summary"], model["batch_acc"]], feed_dict=feed_dict) # print(datetime.datetime.utcnow(), "test step: %d" % step # , "batch_acc: ", _acc) test_writer.add_summary(summary=summary_test, global_step=step) print("test complete: total acc =", total_acc / (FLAGS.max_test_step + 1)) return
def main(): logging.basicConfig(filename='training.log', level=logging.DEBUG) logging.debug(ARGS) # Log arguments # Set Up PyTorch Environment # torch.set_default_tensor_type('torch.FloatTensor') print ARGS.no_gpu if not ARGS.no_gpu: torch.cuda.set_device(ARGS.gpu) torch.cuda.device(ARGS.gpu) if not ARGS.no_gpu: net = SqueezeNet().cuda() else: net = SqueezeNet() criterion = torch.nn.MSELoss().cuda() optimizer = torch.optim.Adadelta(net.parameters()) if ARGS.resume_path is not None: cprint('Resuming w/ ' + ARGS.resume_path, 'yellow') save_data = torch.load(ARGS.resume_path) net.load_state_dict(save_data) epoch = 0 data = None batch = Batch.Batch(net) if ARGS.bkup is not None: save_data = torch.load(ARGS.bkup) net.load_state_dict(save_data['net']) data = save_data['data'] data.get_segment_data() epoch = save_data['epoch'] else: data = Data.Data() # Maitains a list of all inputs to the network, and the loss and outputs for # each of these runs. This can be used to sort the data by highest loss and # visualize, to do so run: # display_sort_trial_loss(data_moment_loss_record , data) data_moment_loss_record = {} rate_counter = Utils.RateCounter() def run_net(data_index): batch.fill(data, data_index) # Get batches ready batch.forward(optimizer, criterion, data_moment_loss_record) try: backup1 = True avg_val_loss = Utils.LossLog() while True: logging.debug('Starting training epoch #{}'.format(epoch)) net.train() # Train mode print_counter = Utils.MomentCounter(ARGS.print_moments) save_counter = Utils.MomentCounter(ARGS.save_moments) while not data.train_index.epoch_complete: # Epoch of training run_net(data.train_index) # Run network batch.backward(optimizer) # Backpropagate # Logging Loss rate_counter.step() if save_counter.step(data.train_index): save_state = {'data' : data, 'net' : net.state_dict(), 'epoch' : epoch} if backup1: torch.save(save_state, 'backup1.bkup') backup1 = False else: torch.save(save_state, 'backup2.bkup') backup1 = True if print_counter.step(data.train_index): print('mode = train\n' 'ctr = {}\n' 'most recent loss = {}\n' 'epoch progress = {} \n' 'epoch = {}\n' .format(data.train_index.ctr, batch.loss.data[0], 100. * data.train_index.ctr / len(data.train_index.valid_data_moments), epoch)) if ARGS.display: batch.display() plt.figure('loss') plt.clf() # clears figure print_timer.reset() data.train_index.epoch_complete = False logging.debug('Finished training epoch #{}'.format(epoch)) logging.debug('Starting validation epoch #{}'.format(epoch)) epoch_val_loss = Utils.LossLog() print_counter = Utils.MomentCounter(ARGS.print_moments) net.eval() # Evaluate mode while not data.val_index.epoch_complete: run_net(data.val_index) # Run network epoch_val_loss.add(data.train_index.ctr, batch.loss.data[0]) if print_counter.step(data.val_index): epoch_val_loss.export_csv( 'logs/epoch%02d_val_loss.csv' % (epoch,)) print('mode = validation\n' 'ctr = {}\n' 'average val loss = {}\n' 'epoch progress = {} %\n' 'epoch = {}\n' .format(data.val_index.ctr, epoch_val_loss.average(), 100. * data.val_index.ctr / len(data.val_index.valid_data_moments), epoch)) data.val_index.epoch_complete = False avg_val_loss.add(epoch, epoch_val_loss.average()) avg_val_loss.export_csv('logs/avg_val_loss.csv') logging.debug('Finished validation epoch #{}'.format(epoch)) logging.info('Avg Val Loss = {}'.format(epoch_val_loss.average())) Utils.save_net( "epoch%02d_save_%f" % (epoch, epoch_val_loss.average()), net) epoch += 1 except Exception: logging.error(traceback.format_exc()) # Log exception # Interrupt Saves Utils.save_net('interrupt_save', net)
loss_record['train'] = Utils.Loss_Record() loss_record['val'] = Utils.Loss_Record() rate_counter = Utils.Rate_Counter() data = Data.Data() timer = {} timer['train'] = Timer(60*30) timer['val'] = Timer(60*3) print_timer = Timer(args.print_time) save_timer = Timer(args.save_time) trial_loss_record = {} batch = Batch.Batch(net) while True: for mode, data_index in [('train', data.train_index), ('val', data.val_index)]: timer[mode].reset() while not timer[mode].check(): batch.fill(data, data_index) # Get batches ready batch.forward(optimizer, criterion, trial_loss_record) # Run net if mode == 'train': # Backpropagate batch.backward(optimizer) loss_record[mode].add(batch.loss.data[0]) rate_counter.step()
loss_record['train'] = Utils.Loss_Record() loss_record['val'] = Utils.Loss_Record() rate_counter = Utils.Rate_Counter() data = Data.Data() timer = {} timer['train'] = Timer(args.mini_train_time) timer['val'] = Timer(args.mini_val_time) print_timer = Timer(args.print_time) save_timer = Timer(args.save_time) data_moment_loss_record = {} batch = Batch.Batch(net) while True: for mode, data_index in [('train', data.train_index), ('val', data.val_index)]: timer[mode].reset() while not timer[mode].check(): batch.fill(data, data_index) # Get batches ready batch.forward(optimizer, criterion, data_moment_loss_record) # Run net, forward pass if mode == 'train': # Backpropagate batch.backward(optimizer) loss_record[mode].add(batch.loss.data[0]) rate_counter.step()
import os, sys import tensorflow as tf import cv2 import gc from tqdm import tqdm_notebook import time import Batch as batch import EDM_Model as edm import vgg.vgg19 as vgg data = batch.Batch(Style_path="./Train_Example/Style", Content_path="./Train_Example/Content") #Opt learning_rate = 0.001 lambda_c = 1. lambda_s = 5. lambda_tv = 1e-5 mini_batch_size = 4 n_epoch = 1 test_check = 200 a_loss = 0 test_loss = 0 #Tensor Flow GPU Memory config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' config.gpu_options.allow_growth = True tf.reset_default_graph()
def run(self): # Check that ElasticSearch is alive self.check_index() # If the user specified the --REBUILD flag, recreate the index if self.options['rebuild']: self.rebuild_index() # Connect to the repository registry = MetadataRegistry() registry.registerReader(self.settings["metadata_format"], self.settings["metadata_reader"]) client = Client(self.settings["uri"], registry) identity = client.identify() print "Connected to repository: %s" % identity.repositoryName() # got to update granularity or we barf with: # oaipmh.error.BadArgumentError: Max granularity is YYYY-MM-DD:2003-04-10T00:00:00Z client.updateGranularity() # Initialise some variables batcher = Batch.Batch() total_records = 0 start = time.time() # Now do the synchonisation # If the user specified an identifier, then synchronise this record if (self.options['identifier'] is not None): total_records += self.synchronise_record( client, batcher, self.options['identifier']) else: # Else, synchronise using the date-range provided by the user, or failing that, # the date-range based on the last sync # Get the synchronisation config record synchronisation_config = self.get_synchronisation_config() if self.options["from_date"] is not None: # If the user specified a from-date argument, use it from_date = self.options[ "from_date"] # already a date (not a datetime) elif synchronisation_config is not None and "to_date" in synchronisation_config: # Else read the last synchronised to_date from the config, and add on a day from_date = dateutil.parser.parse( synchronisation_config["to_date"]).date() + timedelta( days=1) else: # Else use the default_from_date in the config from_date = dateutil.parser.parse( self.settings['default_from_date']).date() if self.options["to_date"] is not None: to_date = self.options[ "to_date"] # already a date (not a datetime) else: to_date = (date.today() - timedelta(days=1)) # Force the from_date to use time 00:00:00 from_date = datetime.combine( from_date, _time(hour=0, minute=0, second=0, microsecond=0)) # Force the to_date to use time 23:59:59 to_date = datetime.combine( to_date, _time(hour=23, minute=59, second=59, microsecond=0)) print "Synchronising from %s - %s" % (from_date, to_date) while from_date < to_date: next_date = datetime.combine( from_date.date() + timedelta(days=(self.settings['delta_days'] - 1)), _time(hour=23, minute=59, second=59, microsecond=0)) number_of_records = self.synchronise_period( client, batcher, from_date, next_date) batcher.clear() #Store the records in elasticsearch self.put_synchronisation_config(from_date, next_date, number_of_records) from_date += timedelta(days=(self.settings['delta_days'])) total_records += number_of_records # Pause so as not to get banned. to = 20 print "Sleeping for %i seconds so as not to get banned." % to time.sleep(to) # Store the records in the index batcher.clear() # Print out some statistics time_spent = time.time() - start print 'Total time spent: %d seconds' % (time_spent) if time_spent > 0.001: # careful as its not an integer print 'Total records synchronised: %i records (%d records/second)' % ( total_records, (total_records / time_spent)) else: print 'Total records synchronised: %i records' % (total_records) return total_records sys.exit()