def main(_): inputs = tf.placeholder(shape=[None, None, 20], dtype=tf.float32) labels = tf.placeholder(shape=[None, None], dtype=tf.int64) is_training = tf.placeholder(shape=[], dtype=tf.bool) seq_len = tf.reduce_sum(tf.cast( tf.not_equal(tf.reduce_sum(inputs, axis=2), 0.), tf.int32), axis=1) global_step = tf.train.get_or_create_global_step() logits = wavenet.bulid_wavenet(inputs, len(utils.class_names), is_training) loss = tf.nn.ctc_loss(labels=labels, inputs=logits, sequence_length=seq_len) outputs, _ = tf.nn.ctc_beam_search_decoder(tf.transpose(logits, perm=[1, 0, 2]), seq_len, merge_repeated=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimize = tf.train.AdamOptimizer(learning_rate=0.01).minimize( loss=loss, global_step=global_step) restore_op = utils.restore_from_pretrain(FLAGS.pretrain_dir) save = tf.train.Saver() train_dattaset = dataset.create(FLAGS.train_dir) test_dataset = dataset.create(FLAGS.test_dir) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(restore_op) if len(os.listdir(FLAGS.checkpoint_dir)) > 0: save.restore(sess, tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
def process(argv): if len(argv) != 2: sys.stderr.write("Usage: %s <filename>\n" % argv[0]) return 1 #password passwd = getpass.getpass() fname = argv[1] #process line by line lines = [line.strip() for line in open(fname)] for l in lines: name = os.path.splitext(l)[0] pretags = re.findall('[A-Z][^A-Z]*', name) #pull out tags already covered by core tags pretags.remove('Encode') pretags.remove('Broad') pretags.remove('Histone') pretags.remove('Pk') #nice name name = 'ENCODE ChIP-seq: ' + ' '.join(pretags) tags = map(lambda x: '#' + x, CORE_TAGS + pretags) description = ' '.join(tags) + '\n' + CORE_DESC args = "-u USERNAME -n '%s' -d '%s' -f downloads/%s -p True -x '%s'" % (name, description, l, passwd) argv = shlex.split(args) #create data set on Quilt dataset.create(argv) print '%s' % (l) #don't hammer time.sleep(WAIT) return 1
def main(): # Load our data and create our data set. print('Loading data...') DATA_COLUMNS = [ 'Date', 'HH', 'MM', 'SS', 'F2-F4[uV]', 'F4-C4[uV]', # 'C4-P4[uV]', # 'P4-O2[uV]', # 'F1-F3[uV]', # 'F3-C3[uV]', # 'C3-P3[uV]', # 'P3-O1[uV]', 'C4-A1[uV]', 'ECG1-ECG2[uV]' ] data = signals.load('../data/n1.csv', DATA_COLUMNS) # , 900000) print('Loading labels...') LABEL_COLUMNS = ['Time [hh:mm:ss]', 'Event'] labels = labels_script.load('../data/n1.txt', LABEL_COLUMNS) print('Creating data-set...') data_set = dataset.create(data, labels) describe(data_set) # Convert labels from strings to integers and define the number of classes (2 in our case) data_set, num_classes = convert_string_to_integer(data_set) cross_validation(10, data_set, num_classes)
def main(disable=0, device="cpu", cycles=100, D=32, N=128, name="evo"): disable = int(disable) cycles = int(cycles) print("Using device: %s" % device) N = int(N) D = int(D) data = dataset.create(N, D) test = dataset.test(N, D) if name == "evo": net = model.EvolutionaryModel(D, disable=disable).to(device) try: for i in range(cycles): net.do_cycle(*data, *test) except KeyboardInterrupt: pass best = net.select_best() print(best.net[0].weight.data) train.visualize(net.select_best(), outf="results.png", D=D) else: net = model.Model(D) train.train(*data, *test, net) print(net.net[0].weight)
def main(): seed_everything(args.seed) metanalysis = load_metanalysis(args.metaanalysis, args.metanqt, args.metanst) group1_sub_path, group2_sub_path = load_subject_path(args.datapath) train_dataloader, test_dataloader = dataset.create(args.datatype, args.runtype, 30, True, False, args.datapath, group1_sub_path, group2_sub_path) model = models.create(args.modeltype, 59412, 5, args.seed, xx=None) print(model)
# Update learning rate for optimizer self.session.run(tf.assign(self.learning_rate, new_LR)) self.T_prev = T def DropAdaptation(self): self.T_prev = 0 self.session.run(tf.assign(self.learning_rate, 1.0)) if __name__ == '__main__': BLOCK_SIZE = 8 # Get training dataset for key expansion if not os.path.exists('dataset.csv'): x_train, y_train = dataset.create(BLOCK_SIZE, 1000) dataset.save((x_train, y_train), 'dataset.csv') else: x_train, y_train = dataset.load('dataset.csv') cipher = RNN_Cipher(False) cipher.Summary() cipher.KeyExpansion(x_train, y_train) # plaintext = b'Artificial neural networks (ANN) or connectionist systems are computing systems vaguely inspired by the biological neural networks that constitute animal brains.' plaintext = b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAzzzzzzzzzzzzzzzzzzzz' * 10 plaintext = pad(plaintext, BLOCK_SIZE) ciphertext_blocks = cipher.Encrypt(plaintext) plot_ciphertext(ciphertext_blocks)
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.device) utils.load(FLAGS.config_path) global_step = tf.train.get_or_create_global_step() train_dataset = dataset.create(FLAGS.dataset_path, FLAGS.batch_size, repeat=True) # bug tensorflow!!! the train_dataset[0].shape[0] != FLAGS.batch_size once in a while # waves = tf.reshape(tf.sparse.to_dense(train_dataset[0]), shape=[FLAGS.batch_size, -1, utils.Data.num_channel]) waves = tf.sparse.to_dense(train_dataset[0]) waves = tf.reshape(waves, [tf.shape(waves)[0], -1, utils.Data.num_channel]) labels = tf.cast(train_dataset[1], tf.int32) sequence_length = tf.cast(train_dataset[2], tf.int32) logits = wavenet.bulid_wavenet(waves, len(utils.Data.vocabulary), is_training=True) loss = tf.reduce_mean( tf.nn.ctc_loss(labels, logits, sequence_length, time_major=False)) vocabulary = tf.constant(utils.Data.vocabulary) decodes, _ = tf.nn.ctc_beam_search_decoder(tf.transpose(logits, [1, 0, 2]), sequence_length, merge_repeated=False) outputs = tf.gather(vocabulary, tf.sparse.to_dense(decodes[0])) labels = tf.gather(vocabulary, tf.sparse.to_dense(labels)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimize = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate).minimize( loss=loss, global_step=global_step) save = tf.train.Saver(max_to_keep=1000) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(train_dataset[-1]) # if os.path.exists(FLAGS.pretrain_dir) and len(os.listdir(FLAGS.pretrain_dir)) > 0: # save.restore(sess, tf.train.latest_checkpoint(FLAGS.pretrain_dir)) ckpt_dir = os.path.split(FLAGS.ckpt_path)[0] if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) if len(os.listdir(ckpt_dir)) > 0: save.restore(sess, tf.train.latest_checkpoint(ckpt_dir)) losses, tps, preds, poses = 0, 0, 0, 0 while True: gp, ll, uid, ot, ls, _ = sess.run( (global_step, labels, train_dataset[3], outputs, loss, optimize)) tp, pred, pos = utils.evalutes(utils.cvt_np2string(ot), utils.cvt_np2string(ll)) tps += tp losses += ls preds += pred poses += pos if gp % FLAGS.display == 0: glog.info( "Step %d: loss=%f, tp=%d, pos=%d, pred=%d, f1=%f." % (gp, losses if gp == 0 else (losses / FLAGS.display), tps, preds, poses, 2 * tps / (preds + poses + 1e-10))) losses, tps, preds, poses = 0, 0, 0, 0 if (gp + 1) % FLAGS.snapshot == 0 and gp != 0: save.save(sess, FLAGS.ckpt_path, global_step=global_step)
if err != '': stop(err) err = dataset.init("favorites.ds") if err != '': stop(err) err = dataset.init("characters.ds") if err != '': stop(err) # # create, read, update and delete # ## create err = dataset.create("friends.ds", "frieda", { "name": "Little Frieda", "email": "*****@*****.**" }) if err != '': stop(err) err = dataset.create("friends.ds", "mojo", { "name": "Mojo Sam, the Yudoo Man", "email": "*****@*****.**" }) if err != '': stop(err) err = dataset.create("friends.ds", "jack", { "name": "Jack Flanders", "email": "*****@*****.**" }) if err != '': stop(err)
def main(_): utils.load(FLAGS.config_path) os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.device) # with tf.device(FLAGS.device): test_dataset = dataset.create(FLAGS.dataset_path, repeat=False, batch_size=1) waves = tf.reshape(tf.sparse.to_dense(test_dataset[0]), shape=[1, -1, utils.Data.num_channel]) labels = tf.sparse.to_dense(test_dataset[1]) sequence_length = tf.cast(test_dataset[2], tf.int32) vocabulary = tf.constant(utils.Data.vocabulary) labels = tf.gather(vocabulary, labels) logits = wavenet.bulid_wavenet(waves, len(utils.Data.vocabulary)) decodes, _ = tf.nn.ctc_beam_search_decoder(tf.transpose(logits, perm=[1, 0, 2]), sequence_length, merge_repeated=False) outputs = tf.gather(vocabulary, tf.sparse.to_dense(decodes[0])) save = tf.train.Saver() evalutes = {} if os.path.exists(FLAGS.ckpt_dir + '/evalute.json'): evalutes = json.load( open(FLAGS.ckpt_dir + '/evalute.json', encoding='utf-8')) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: status = 0 while True: filepaths = glob.glob(FLAGS.ckpt_dir + '/*.index') filepaths.sort() filepaths.reverse() filepath = filepaths[0] max_uid = 0 for filepath in filepaths: model_path = os.path.splitext(filepath)[0] uid = os.path.split(model_path)[-1] if max_uid <= int(uid.split("-")[1]): max_uid = int(uid.split("-")[1]) max_uid_full = uid max_model_path = model_path # print(max_uid) status = 2 sess.run(tf.global_variables_initializer()) sess.run(test_dataset[-1]) save.restore(sess, max_model_path) # sa print(tf.train.latest_checkpoint(FLAGS.ckpt_dir)) # ve.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) evalutes[max_uid_full] = {} tps, preds, poses, count = 0, 0, 0, 0 while True: try: count += 1 y, y_ = sess.run((labels, outputs)) y = utils.cvt_np2string(y) y_ = utils.cvt_np2string(y_) tp, pred, pos = utils.evalutes(y_, y) tps += tp preds += pred poses += pos # if count % 1000 == 0: # glog.info('processed %d: tp=%d, pred=%d, pos=%d.' % (count, tps, preds, poses)) except: # if count % 1000 != 0: # glog.info('processed %d: tp=%d, pred=%d, pos=%d.' % (count, tps, preds, poses)) break evalutes[max_uid_full]['tp'] = tps evalutes[max_uid_full]['pred'] = preds evalutes[max_uid_full]['pos'] = poses evalutes[max_uid_full]['f1'] = 2 * tps / (preds + poses + 1e-20) json.dump( evalutes, open(FLAGS.ckpt_dir + '/evalute.json', mode='w', encoding='utf-8')) evalute = evalutes[max_uid_full] glog.info('Evalute %s: tp=%d, pred=%d, pos=%d, f1=%f.' % (max_uid_full, evalute['tp'], evalute['pred'], evalute['pos'], evalute['f1'])) if status == 1: time.sleep(60) status = 1
def save_records(collection, records): for r in records: err = dataset.create(collection, r['UID'], r) if err != '': print("Error in saving record: " + err)
def get_wos_refs(new=True): #New=True will download everything from scratch and delete any existing records collection = 'wos_refs.ds' if new == True: if os.path.exists(collection) == True: shutil.rmtree(collection) if os.path.isdir(collection) == False: ok = dataset.init(collection) if ok == False: print("Dataset failed to init collection") exit() #Run query to get scope of records token = os.environ['WOSTOK'] headers = {'X-ApiKey': token, 'Content-type': 'application/json'} base_url = 'https://api.clarivate.com/api/wos/?databaseId=WOK' collected = dataset.has_key(collection, "captured") if collected == True: date = dataset.read(collection, "captured") date = date[0]['captured'] date = datetime.fromisoformat(date) current = datetime.today() diff = (current - date) base_url = base_url + '&loadTimeSpan=' + str(diff.days) + 'D' url = base_url + '&count=1&firstRecord=1&usrQuery=OG=California%20Institute%20of%20Technology' incomplete = dataset.has_key(collection, "incomplete") if incomplete == True: query = dataset.read(collection, "incomplete") query_id = query[0]['incomplete'] query = dataset.read(collection, "record_start") record_start = query[0]['record_start'] query = dataset.read(collection, "record_count") record_count = query[0]['record_count'] else: response = requests.get(url, headers=headers) response = response.json() record_count = response['QueryResult']['RecordsFound'] print(record_count) query_id = response['QueryResult']['QueryID'] dataset.create(collection, 'incomplete', {"incomplete": query_id}) record_start = 1 dataset.create(collection, 'record_start', {"record_start": record_start}) dataset.create(collection, 'record_count', {"record_count": record_start}) query_url = 'https://api.clarivate.com/api/wos/query/' while record_count > 0: print(record_start) if record_count > 100: url = query_url + str(query_id) + '?count=100&firstRecord=' +\ str(record_start) response = requests.get(url, headers=headers) response = response.json() print(response) save_records(collection, response['Records']['records']['REC']) record_start = record_start + 100 record_count = record_count - 100 dataset.update(collection, 'record_start', {"record_start": record_start}) dataset.update(collection, 'record_count', {"record_count": record_count}) else: url = query_url + str(query_id) + '?count=' +\ str(record_count) + '&firstRecord='+ str(record_start) response = requests.get(url, headers=headers) response = response.json() save_records(collection, response['Records']['records']['REC']) record_count = 0 date = datetime.today().isoformat() record = {"captured": date} if dataset.has_key(collection, "captured"): err = dataset.update(collection, 'captured', record) if err != "": print(f"Unexpected error on update: {err}") else: err = dataset.create(collection, 'captured', record) if err != "": print(f"Unexpected error on create: {err}") dataset.delete(collection, 'incomplete')
data = train_set + dev_set tmp = np.arange(0, len(data)) np.random.shuffle(tmp) aucs = [] for split in range(args.num_splits): print(f'split {split}') val_indexes = tmp[split * len(data) // args.num_splits:min((split + 1) * len(data) // args.num_splits, len(data))] train_indexes = [i for i in tmp if i not in val_indexes] train_set = [data[i] for i in train_indexes] dev_set = [data[i] for i in val_indexes] print('building dataloaders ...') train_dataloader = create(data=train_set, datatype='train', batch_size=args.train_batch_size) dev_dataloader = create(data=dev_set, datatype='dev', batch_size=args.dev_batch_size) print('done !') model = MyVisualBert() best_model, auc = train(train_dataloader, dev_dataloader, model, args.lr, args.epochs) aucs.append(auc) torch.save(best_model, f'saved_models/cross_val_{split}.pt') print(f'mean auc : {np.mean(aucs)}')