async def run_bar(message, timeout, sertver_started, interval=0.1): bar = MoonSpinner(message) spent = 0 while spent < timeout and not sertver_started.done(): bar.next() await asyncio.sleep(interval) spent += interval bar.finish()
def build_file_db(start_dir, extensions): """ return a database containing info for each file under the starting path/directory :param extensions: :param start_dir: directory in which to start :type start_dir: str :return: a 2-tuple containing the info db and time to run this function :rtype: tuple """ if not extensions: sys.exit('@@@ERROR valid extension list not passed to build_file_db') db = {} spinner = MoonSpinner( 'Working ') # cli spinner to indicate something is happening for p in get_file_list( start_dir ): # loop over all the Paths (files) in the hierarchy starting at start_dir # got a file (not a dir) and filename has an extension of interest if p.is_file() and p.suffix.lower() in extensions: pstring = str(p) # get the Path in string form xh = get_file_hash(p) # first time seeing this file if xh not in db: db[xh] = [{ 'path': pstring, 'extension': p.suffix.lower(), }] # found a likely alt of an existing file, add it to the alts key for for the existing file else: db[xh].append({ 'path': pstring, 'extension': p.suffix.lower(), }) spinner.next() print('\n') return db
def get_file_extensions(start_dir): """ return a database containing the unique file extensions of files in a given directory starting path/directory todo track count of each extension type :param start_dir: directory in which to start :type start_dir: str :return: a 2-tuple containing the info db and time to run this function :rtype: tuple """ exts = set() spinner = MoonSpinner( 'Working ') # cli spinner to indicate something is happening start = time.time() for p in get_file_list(start_dir): if p.is_file(): exts.add(p.suffix.lower()) spinner.next() duration = time.time() - start print('\n') print('Completed looking for unique file extensions') print( f'found {len(exts)} extensions in: {time.strftime("%H:%M:%S", time.gmtime(duration))}' ) print(f'Extensions found: {exts}') return exts
def main(): train_dataset, test_dataset = train_input_fn() optimizer = tf.keras.optimizers.Adam(learning_rate=hp.lr) model = UTransformer(hp) model_loss = Loss(model) best_score = float('-inf') not_improved_count = 0 checkpoint_file = hp.ckpt if checkpoint_file == '': checkpoint_file = 'ckp_0' else: model.load_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}') logger.add(f"{hp.logdir}/cmip_train.log", enqueue=True) for epoch in range(hp.num_epochs): for step, (x_batch_train, ys_batch_train) in enumerate(train_dataset): start = time.clock() with tf.GradientTape() as tape: y_predict = model([x_batch_train, ys_batch_train], training=True) loss_ssim, loss_l2, loss_l1, loss = model_loss([y_predict, ys_batch_train[1]]) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) elapsed = (time.clock() - start) template = ("step {} loss is {:1.5f}, " "loss ssim is {:1.5f}, " "loss l2 is {:1.5f}, " "loss l1 is {:1.5f}." "({:1.2f}s/step)") logger.info(template.format(step, loss.numpy(), loss_ssim.numpy(), loss_l2.numpy(), loss_l1.numpy(), elapsed)) if epoch % hp.num_epoch_record == 0: loss_test = 0 loss_ssim_test = 0 loss_l2_test = 0 loss_l1_test = 0 count = 0 y_true, y_pred = [], [] spinner = MoonSpinner('Testing ') for step, (x_batch_test, ys_batch_test) in enumerate(test_dataset): y_predict = model([x_batch_test, ys_batch_test], training=False) loss_ssim, loss_l2, loss_l1, loss = model_loss([y_predict, ys_batch_test[1]]) loss_ssim_test += loss_ssim.numpy() loss_l2_test += loss_l2.numpy() loss_l1_test += loss_l1.numpy() loss_test += loss.numpy() count += 1 y_true.append(np.array(nino_seq(ys_batch_test[1][:, :, :, :, 0]))) y_pred.append(np.array(nino_seq(y_predict[:, :, :, :, 0]))) spinner.next() y_true = tf.concat(y_true, axis=0) y_pred = tf.concat(y_pred, axis=0) sco = score(y_true, y_pred) if sco > best_score: best_score = sco not_improved_count = 0 best_state = True else: not_improved_count += 1 best_state = False spinner.finish() logger.info("TEST COMPLETE!") template = ("TEST DATASET STATISTICS: " "loss is {:1.5f}, " "loss ssim is {:1.5f}, " "loss l2 is {:1.5f}, " "loss l1 is {:1.5f}," "acc skill score is {:1.5f}.") logger.info(template.format(loss_test/count, loss_ssim_test/count, loss_l2_test/count, loss_l1_test/count, sco)) total_epoch = int(re.findall("\d+", checkpoint_file)[0]) checkpoint_file = checkpoint_file.replace(f'_{total_epoch}', f'_{total_epoch + 1}') # if not_improved_count == hp.early_stop_patience: # print("Validation performance didn\'t improve for {} epochs. " "Training stops.".format( # hp.early_stop_patience)) # break # if best_state: model.save_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}', save_format='tf') # model.save("my_model") logger.info("Saved checkpoint_file {}".format(checkpoint_file))
def u_convlstm_trainer(): train_dataset, test_dataset = train_input_fn() optimizer = tf.keras.optimizers.Adam(learning_rate=hp.lr) model = UConvlstm(hp) model_loss = Loss(model) checkpoint_file = hp.ckpt if checkpoint_file == '': checkpoint_file = 'uconvlstm-ckp_0' else: model.load_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}') logger.add( f"{hp.logdir}/{hp.in_seqlen}_{hp.out_seqlen}_{hp.lead_time}_train.log", enqueue=True) for epoch in range(hp.num_epochs): for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): start = time.clock() with tf.GradientTape() as tape: y_predict = model(x_batch_train, training=True) print("y_pred:", y_predict.shape) print("y_batch:", y_batch_train.shape) loss_ssim, loss_l2, loss_l1, loss = model_loss( [y_predict, y_batch_train]) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) elapsed = (time.clock() - start) template = ("step {} loss is {:1.5f}, " "loss ssim is {:1.5f}, " "loss l2 is {:1.5f}, " "loss l1 is {:1.5f}." "({:1.2f}s/step)") logger.info( template.format(step, loss.numpy(), loss_ssim.numpy(), loss_l2.numpy(), loss_l1.numpy(), elapsed)) if epoch % hp.num_epoch_record == 0: loss_test = 0 loss_ssim_test = 0 loss_l2_test = 0 loss_l1_test = 0 count = 0 spinner = MoonSpinner('Testing ') for step, (x_batch_test, y_batch_test) in enumerate(test_dataset): y_predict = model(x_batch_test, training=False) loss_ssim, loss_l2, loss_l1, loss = model_loss( [y_predict, y_batch_test]) loss_ssim_test += loss_ssim.numpy() loss_l2_test += loss_l2.numpy() loss_l1_test += loss_l1.numpy() loss_test += loss.numpy() count += 1 spinner.next() spinner.finish() logger.info("TEST COMPLETE!") template = ("TEST DATASET STATISTICS: " "loss is {:1.5f}, " "loss ssim is {:1.5f}, " "loss l2 is {:1.5f}, " "loss l1 is {:1.5f}.") logger.info( template.format(loss_test / count, loss_ssim_test / count, loss_l2_test / count, loss_l1_test / count)) total_epoch = int(re.findall("\d+", checkpoint_file)[0]) checkpoint_file = checkpoint_file.replace(f'_{total_epoch}', f'_{total_epoch + 1}') model.save_weights(f'{hp.single_gpu_model_dir}/{checkpoint_file}', save_format='tf') logger.info("Saved checkpoint_file {}".format(checkpoint_file))
print(connectASpace.status_code) ethno = load_pickled('./data/ethno.txt') spinner = MoonSpinner('loading the resource trees...') state = 'loading' ao_ids = [] while state != 'FINISHED': for i in ethno: if 'Crocodile' not in i['title']: tree = get_item(i['uri'] + '/tree') if tree['children'] != []: for child in tree['children']: ao_ids.append(child['id']) spinner.next() else: spinner.next() else: spinner.next() state = 'FINISHED' spinner = Spinner('\nloading all archival objects...') state = 'loading' # get each archival object by id, create a list of each AO (Note: takes several minutes) aos = [] while state != 'FINISHED': for a in ao_ids: aos.append(get_item('/repositories/2/archival_objects/' + str(a))) spinner.next()
class PageRank: def __init__(self): self.progress_bar = MoonSpinner('Calculating page ranks') def pageRank(self): sourceDirectory =settings.PAGERANK_RESOURCE_DIRECTORY destDirectory = PAGERANK_DESTINATION_DIRECTORY docs = [] id2index = {} # print('start read files') # read files for file in map(lambda x: os.path.join(sourceDirectory,x),list_files(sourceDirectory, '*.json')): with open(file, 'r') as readFile: doc = json.load(readFile) id2index[doc['id']] = len(docs) self.progress_bar.next() docs.append(doc) # print('start calc page rank') # create links matrix n = len(docs) p = [] for doc in docs: pp = [0] * n for linkID in filter(lambda x: x in id2index.keys() , (set(doc['cited_in']) |set(doc['refrences'])) ): pp[id2index[linkID]] = 1 p.append(pp) # calculate page rank pr = self.pageRankMathCalculation(p,PAGERANK_ALFA,PAGERANK_ERROR) # print('start save files') # save docs os.makedirs(destDirectory, exist_ok=True) for doc,pagerank in zip(docs,pr): doc['pageRank'] = pagerank file_name = '{}.json'.format(doc['id']) with open(os.path.join(destDirectory , file_name), 'w') as outfile: json.dump(doc, outfile) # print('end page rank') def pageRankMathCalculation(self, p, alfa, error): n = len(p) sum = np.sum(p, axis=1) for i in range(0, n): if sum[i] == 0: p[i] = np.repeat(1 / n, n) else: p[i] = np.divide(p[i], sum[i]) v1 = np.repeat(1 / n, n) v = np.tile(v1, [n, 1]) p = np.add(np.dot(p, (1 - alfa)), np.dot(v, alfa)) x = np.zeros(n) x[0] = 1 step = 0 while True: step += 1 pervx = x x = np.dot(x, p) self.progress_bar.next() if self.calcError(pervx, x) < error: self.progress_bar.finish() break # print('end step = '+ step.__str__()) return x def calcError(self, perv, new): sum = 0 for i in range(0, len(new)): sum += abs(new[i] - perv[i]) return sum