Example #1
0
def main(_):
    inputs = tf.placeholder(shape=[None, None, 20], dtype=tf.float32)
    labels = tf.placeholder(shape=[None, None], dtype=tf.int64)
    is_training = tf.placeholder(shape=[], dtype=tf.bool)
    seq_len = tf.reduce_sum(tf.cast(
        tf.not_equal(tf.reduce_sum(inputs, axis=2), 0.), tf.int32),
                            axis=1)
    global_step = tf.train.get_or_create_global_step()
    logits = wavenet.bulid_wavenet(inputs, len(utils.class_names), is_training)
    loss = tf.nn.ctc_loss(labels=labels,
                          inputs=logits,
                          sequence_length=seq_len)
    outputs, _ = tf.nn.ctc_beam_search_decoder(tf.transpose(logits,
                                                            perm=[1, 0, 2]),
                                               seq_len,
                                               merge_repeated=False)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimize = tf.train.AdamOptimizer(learning_rate=0.01).minimize(
            loss=loss, global_step=global_step)
    restore_op = utils.restore_from_pretrain(FLAGS.pretrain_dir)
    save = tf.train.Saver()
    train_dattaset = dataset.create(FLAGS.train_dir)
    test_dataset = dataset.create(FLAGS.test_dir)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(restore_op)
        if len(os.listdir(FLAGS.checkpoint_dir)) > 0:
            save.restore(sess,
                         tf.train.latest_checkpoint(FLAGS.checkpoint_dir))
Example #2
0
def process(argv):
  if len(argv) != 2:
    sys.stderr.write("Usage: %s <filename>\n" % argv[0])
    return 1

  #password
  passwd = getpass.getpass()
  fname = argv[1]
  #process line by line
  lines = [line.strip() for line in open(fname)]
  for l in lines:
    name = os.path.splitext(l)[0]
    pretags = re.findall('[A-Z][^A-Z]*', name)
    #pull out tags already covered by core tags
    pretags.remove('Encode')
    pretags.remove('Broad')
    pretags.remove('Histone')
    pretags.remove('Pk')
    #nice name
    name = 'ENCODE ChIP-seq: ' + ' '.join(pretags)
    tags = map(lambda x: '#' + x, CORE_TAGS + pretags)
    description = ' '.join(tags) + '\n' + CORE_DESC
    args = "-u USERNAME -n '%s' -d '%s' -f downloads/%s -p True -x '%s'" % (name, description, l, passwd)
    argv = shlex.split(args)
    #create data set on Quilt 
    dataset.create(argv)
    print '%s' % (l)
    #don't hammer
    time.sleep(WAIT)

  return 1
Example #3
0
def main():
    # Load our data and create our data set.
    print('Loading data...')
    DATA_COLUMNS = [
        'Date',
        'HH',
        'MM',
        'SS',
        'F2-F4[uV]',
        'F4-C4[uV]',
        # 'C4-P4[uV]',
        # 'P4-O2[uV]',
        # 'F1-F3[uV]',
        # 'F3-C3[uV]',
        # 'C3-P3[uV]',
        # 'P3-O1[uV]',
        'C4-A1[uV]',
        'ECG1-ECG2[uV]'
    ]
    data = signals.load('../data/n1.csv', DATA_COLUMNS)  # , 900000)

    print('Loading labels...')
    LABEL_COLUMNS = ['Time [hh:mm:ss]', 'Event']
    labels = labels_script.load('../data/n1.txt', LABEL_COLUMNS)

    print('Creating data-set...')
    data_set = dataset.create(data, labels)
    describe(data_set)

    # Convert labels from strings to integers and define the number of classes (2 in our case)
    data_set, num_classes = convert_string_to_integer(data_set)

    cross_validation(10, data_set, num_classes)
Example #4
0
def main(disable=0, device="cpu", cycles=100, D=32, N=128, name="evo"):

    disable = int(disable)
    cycles = int(cycles)

    print("Using device: %s" % device)

    N = int(N)
    D = int(D)

    data = dataset.create(N, D)
    test = dataset.test(N, D)

    if name == "evo":
        net = model.EvolutionaryModel(D, disable=disable).to(device)

        try:
            for i in range(cycles):
                net.do_cycle(*data, *test)
        except KeyboardInterrupt:
            pass

        best = net.select_best()
        print(best.net[0].weight.data)

        train.visualize(net.select_best(), outf="results.png", D=D)

    else:
        net = model.Model(D)
        train.train(*data, *test, net)
        print(net.net[0].weight)
Example #5
0
def main():
    seed_everything(args.seed)
    metanalysis = load_metanalysis(args.metaanalysis, args.metanqt,
                                   args.metanst)
    group1_sub_path, group2_sub_path = load_subject_path(args.datapath)

    train_dataloader, test_dataloader = dataset.create(args.datatype,
                                                       args.runtype, 30, True,
                                                       False, args.datapath,
                                                       group1_sub_path,
                                                       group2_sub_path)

    model = models.create(args.modeltype, 59412, 5, args.seed, xx=None)

    print(model)
Example #6
0
        # Update learning rate for optimizer
        self.session.run(tf.assign(self.learning_rate, new_LR))
        self.T_prev = T

    def DropAdaptation(self):
        self.T_prev = 0
        self.session.run(tf.assign(self.learning_rate, 1.0))


if __name__ == '__main__':
    BLOCK_SIZE = 8

    # Get training dataset for key expansion
    if not os.path.exists('dataset.csv'):
        x_train, y_train = dataset.create(BLOCK_SIZE, 1000)
        dataset.save((x_train, y_train), 'dataset.csv')
    else:
        x_train, y_train = dataset.load('dataset.csv')

    cipher = RNN_Cipher(False)
    cipher.Summary()
    cipher.KeyExpansion(x_train, y_train)

    # plaintext = b'Artificial neural networks (ANN) or connectionist systems are computing systems vaguely inspired by the biological neural networks that constitute animal brains.'
    plaintext = b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAzzzzzzzzzzzzzzzzzzzz' * 10
    plaintext = pad(plaintext, BLOCK_SIZE)

    ciphertext_blocks = cipher.Encrypt(plaintext)

    plot_ciphertext(ciphertext_blocks)
Example #7
0
def main(_):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.device)
    utils.load(FLAGS.config_path)
    global_step = tf.train.get_or_create_global_step()
    train_dataset = dataset.create(FLAGS.dataset_path,
                                   FLAGS.batch_size,
                                   repeat=True)

    # bug tensorflow!!!  the  train_dataset[0].shape[0] != FLAGS.batch_size once in a while
    # waves = tf.reshape(tf.sparse.to_dense(train_dataset[0]), shape=[FLAGS.batch_size, -1, utils.Data.num_channel])
    waves = tf.sparse.to_dense(train_dataset[0])
    waves = tf.reshape(waves, [tf.shape(waves)[0], -1, utils.Data.num_channel])

    labels = tf.cast(train_dataset[1], tf.int32)
    sequence_length = tf.cast(train_dataset[2], tf.int32)
    logits = wavenet.bulid_wavenet(waves,
                                   len(utils.Data.vocabulary),
                                   is_training=True)
    loss = tf.reduce_mean(
        tf.nn.ctc_loss(labels, logits, sequence_length, time_major=False))

    vocabulary = tf.constant(utils.Data.vocabulary)
    decodes, _ = tf.nn.ctc_beam_search_decoder(tf.transpose(logits, [1, 0, 2]),
                                               sequence_length,
                                               merge_repeated=False)
    outputs = tf.gather(vocabulary, tf.sparse.to_dense(decodes[0]))
    labels = tf.gather(vocabulary, tf.sparse.to_dense(labels))

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimize = tf.train.AdamOptimizer(
            learning_rate=FLAGS.learning_rate).minimize(
                loss=loss, global_step=global_step)

    save = tf.train.Saver(max_to_keep=1000)
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(train_dataset[-1])
        # if os.path.exists(FLAGS.pretrain_dir) and len(os.listdir(FLAGS.pretrain_dir)) > 0:
        #   save.restore(sess, tf.train.latest_checkpoint(FLAGS.pretrain_dir))
        ckpt_dir = os.path.split(FLAGS.ckpt_path)[0]
        if not os.path.exists(ckpt_dir):
            os.makedirs(ckpt_dir)
        if len(os.listdir(ckpt_dir)) > 0:
            save.restore(sess, tf.train.latest_checkpoint(ckpt_dir))

        losses, tps, preds, poses = 0, 0, 0, 0
        while True:
            gp, ll, uid, ot, ls, _ = sess.run(
                (global_step, labels, train_dataset[3], outputs, loss,
                 optimize))
            tp, pred, pos = utils.evalutes(utils.cvt_np2string(ot),
                                           utils.cvt_np2string(ll))
            tps += tp
            losses += ls
            preds += pred
            poses += pos
            if gp % FLAGS.display == 0:
                glog.info(
                    "Step %d: loss=%f, tp=%d, pos=%d, pred=%d, f1=%f." %
                    (gp, losses if gp == 0 else
                     (losses / FLAGS.display), tps, preds, poses, 2 * tps /
                     (preds + poses + 1e-10)))
                losses, tps, preds, poses = 0, 0, 0, 0
            if (gp + 1) % FLAGS.snapshot == 0 and gp != 0:
                save.save(sess, FLAGS.ckpt_path, global_step=global_step)
Example #8
0
if err != '':
    stop(err)
err = dataset.init("favorites.ds")
if err != '':
    stop(err)
err = dataset.init("characters.ds")
if err != '':
    stop(err)

#
# create, read, update and delete
#

## create
err = dataset.create("friends.ds", "frieda", {
    "name": "Little Frieda",
    "email": "*****@*****.**"
})
if err != '':
    stop(err)
err = dataset.create("friends.ds", "mojo", {
    "name": "Mojo Sam, the Yudoo Man",
    "email": "*****@*****.**"
})
if err != '':
    stop(err)
err = dataset.create("friends.ds", "jack", {
    "name": "Jack Flanders",
    "email": "*****@*****.**"
})
if err != '':
    stop(err)
def main(_):
    utils.load(FLAGS.config_path)
    os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.device)
    # with tf.device(FLAGS.device):
    test_dataset = dataset.create(FLAGS.dataset_path,
                                  repeat=False,
                                  batch_size=1)
    waves = tf.reshape(tf.sparse.to_dense(test_dataset[0]),
                       shape=[1, -1, utils.Data.num_channel])
    labels = tf.sparse.to_dense(test_dataset[1])
    sequence_length = tf.cast(test_dataset[2], tf.int32)
    vocabulary = tf.constant(utils.Data.vocabulary)
    labels = tf.gather(vocabulary, labels)
    logits = wavenet.bulid_wavenet(waves, len(utils.Data.vocabulary))
    decodes, _ = tf.nn.ctc_beam_search_decoder(tf.transpose(logits,
                                                            perm=[1, 0, 2]),
                                               sequence_length,
                                               merge_repeated=False)
    outputs = tf.gather(vocabulary, tf.sparse.to_dense(decodes[0]))
    save = tf.train.Saver()

    evalutes = {}
    if os.path.exists(FLAGS.ckpt_dir + '/evalute.json'):
        evalutes = json.load(
            open(FLAGS.ckpt_dir + '/evalute.json', encoding='utf-8'))

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        status = 0
        while True:
            filepaths = glob.glob(FLAGS.ckpt_dir + '/*.index')
            filepaths.sort()
            filepaths.reverse()
            filepath = filepaths[0]
            max_uid = 0
            for filepath in filepaths:
                model_path = os.path.splitext(filepath)[0]
                uid = os.path.split(model_path)[-1]
                if max_uid <= int(uid.split("-")[1]):
                    max_uid = int(uid.split("-")[1])
                    max_uid_full = uid
                    max_model_path = model_path
                    # print(max_uid)
            status = 2
            sess.run(tf.global_variables_initializer())
            sess.run(test_dataset[-1])
            save.restore(sess, max_model_path)
            #   sa print(tf.train.latest_checkpoint(FLAGS.ckpt_dir))
            #  ve.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir))
            evalutes[max_uid_full] = {}
            tps, preds, poses, count = 0, 0, 0, 0
            while True:
                try:
                    count += 1
                    y, y_ = sess.run((labels, outputs))
                    y = utils.cvt_np2string(y)
                    y_ = utils.cvt_np2string(y_)
                    tp, pred, pos = utils.evalutes(y_, y)
                    tps += tp
                    preds += pred
                    poses += pos
                #  if count % 1000 == 0:
                #    glog.info('processed %d: tp=%d, pred=%d, pos=%d.' % (count, tps, preds, poses))
                except:
                    #  if count % 1000 != 0:
                    #    glog.info('processed %d: tp=%d, pred=%d, pos=%d.' % (count, tps, preds, poses))
                    break

            evalutes[max_uid_full]['tp'] = tps
            evalutes[max_uid_full]['pred'] = preds
            evalutes[max_uid_full]['pos'] = poses
            evalutes[max_uid_full]['f1'] = 2 * tps / (preds + poses + 1e-20)
            json.dump(
                evalutes,
                open(FLAGS.ckpt_dir + '/evalute.json',
                     mode='w',
                     encoding='utf-8'))
            evalute = evalutes[max_uid_full]
            glog.info('Evalute %s: tp=%d, pred=%d, pos=%d, f1=%f.' %
                      (max_uid_full, evalute['tp'], evalute['pred'],
                       evalute['pos'], evalute['f1']))
            if status == 1:
                time.sleep(60)
            status = 1
def save_records(collection, records):
    for r in records:
        err = dataset.create(collection, r['UID'], r)
        if err != '':
            print("Error in saving record: " + err)
def get_wos_refs(new=True):
    #New=True will download everything from scratch and delete any existing records

    collection = 'wos_refs.ds'

    if new == True:
        if os.path.exists(collection) == True:
            shutil.rmtree(collection)

    if os.path.isdir(collection) == False:
        ok = dataset.init(collection)
        if ok == False:
            print("Dataset failed to init collection")
            exit()

    #Run query to get scope of records
    token = os.environ['WOSTOK']
    headers = {'X-ApiKey': token, 'Content-type': 'application/json'}

    base_url = 'https://api.clarivate.com/api/wos/?databaseId=WOK'

    collected = dataset.has_key(collection, "captured")

    if collected == True:
        date = dataset.read(collection, "captured")
        date = date[0]['captured']
        date = datetime.fromisoformat(date)
        current = datetime.today()
        diff = (current - date)
        base_url = base_url + '&loadTimeSpan=' + str(diff.days) + 'D'

    url = base_url + '&count=1&firstRecord=1&usrQuery=OG=California%20Institute%20of%20Technology'

    incomplete = dataset.has_key(collection, "incomplete")

    if incomplete == True:
        query = dataset.read(collection, "incomplete")
        query_id = query[0]['incomplete']
        query = dataset.read(collection, "record_start")
        record_start = query[0]['record_start']
        query = dataset.read(collection, "record_count")
        record_count = query[0]['record_count']

    else:
        response = requests.get(url, headers=headers)
        response = response.json()
        record_count = response['QueryResult']['RecordsFound']
        print(record_count)
        query_id = response['QueryResult']['QueryID']

        dataset.create(collection, 'incomplete', {"incomplete": query_id})

        record_start = 1

        dataset.create(collection, 'record_start',
                       {"record_start": record_start})
        dataset.create(collection, 'record_count',
                       {"record_count": record_start})

    query_url = 'https://api.clarivate.com/api/wos/query/'

    while record_count > 0:
        print(record_start)
        if record_count > 100:
            url = query_url + str(query_id) + '?count=100&firstRecord=' +\
                str(record_start)
            response = requests.get(url, headers=headers)
            response = response.json()
            print(response)
            save_records(collection, response['Records']['records']['REC'])
            record_start = record_start + 100
            record_count = record_count - 100
            dataset.update(collection, 'record_start',
                           {"record_start": record_start})
            dataset.update(collection, 'record_count',
                           {"record_count": record_count})
        else:
            url = query_url + str(query_id) + '?count=' +\
            str(record_count) + '&firstRecord='+ str(record_start)
            response = requests.get(url, headers=headers)
            response = response.json()
            save_records(collection, response['Records']['records']['REC'])
            record_count = 0

    date = datetime.today().isoformat()
    record = {"captured": date}
    if dataset.has_key(collection, "captured"):
        err = dataset.update(collection, 'captured', record)
        if err != "":
            print(f"Unexpected error on update: {err}")
    else:
        err = dataset.create(collection, 'captured', record)
        if err != "":
            print(f"Unexpected error on create: {err}")

    dataset.delete(collection, 'incomplete')
    data = train_set + dev_set

    tmp = np.arange(0, len(data))
    np.random.shuffle(tmp)

    aucs = []

    for split in range(args.num_splits):
        print(f'split {split}')
        val_indexes = tmp[split * len(data) //
                          args.num_splits:min((split + 1) * len(data) //
                                              args.num_splits, len(data))]
        train_indexes = [i for i in tmp if i not in val_indexes]
        train_set = [data[i] for i in train_indexes]
        dev_set = [data[i] for i in val_indexes]
        print('building dataloaders ...')
        train_dataloader = create(data=train_set,
                                  datatype='train',
                                  batch_size=args.train_batch_size)
        dev_dataloader = create(data=dev_set,
                                datatype='dev',
                                batch_size=args.dev_batch_size)
        print('done !')
        model = MyVisualBert()
        best_model, auc = train(train_dataloader, dev_dataloader, model,
                                args.lr, args.epochs)
        aucs.append(auc)
        torch.save(best_model, f'saved_models/cross_val_{split}.pt')

    print(f'mean auc : {np.mean(aucs)}')