def make_kvs(): print("!!!! do you konw what does this means !!!") id_set = set() with open(fname) as f: for line in f: space = re.sub('[;#\n]', ' ', line) words = space.split(" ") for word in words: id_set.add(word) id_set.remove('') # 不知道为什么有一个蛇皮操作导致含有 ''的字符 id_set.remove('in_links') id_set.remove('link_ID') id_set.remove('out_links') kvs = {} vks = {} count = 0 for i in id_set: kvs[i] = str(count) vks[str(count)] = i count = count + 1 fp.clear(fp.kv_dir) with open(fp.kvs, "w+") as f: for i in kvs: line = i + " " + kvs[i] + "\n" f.write(line) with open(fp.vks, "w+") as f: for i in vks: line = i + " " + vks[i] + "\n" f.write(line)
def make_matrix_day(): """ 创建数据之前,首先会清理整个文件夹 为了创建文件使用一个很智障的操作 所有6月的数据全部都是被抛弃了,其他的函数用于处理这一个问题 数值缺失的问题使用的方法是: 使用0 代替 ,然后使用其他的方法搞定数值的问题,而且同时需要处理的问题不只是这一个, 错误的数据的处理的方法应该是相同的 创建数据的使用的方法为:天 小时 2分钟 也就是维度为 92 24 30 的矩阵 """ fp.clear(fp.matrix) files = [ os.path.join(fp.time_dir, f) for f in os.listdir(fp.time_dir) if os.path.isfile(os.path.join(fp.time_dir, f)) ] # make matrix for every file and data in june is discared for tfile in files: M = np.zeros((92, 24, 30), dtype=np.float32) import ntpath fname = ntpath.basename(tfile) fname = fname.split('.')[0] fname += "d" fname += ".npy" fname = os.path.join(fp.matrix, fname) with open(tfile) as f: for line in f: line = re.sub("[\n]", "", line) words = line.split(" ") if (words[1] == "06"): continue M[map_day(words[1], words[2])][int( words[3])][int(words[4]) // 2] = float(words[5]) np.save(fname, M)
def seperate_links(): """ 由于使用的文件的处理的方式为append,所以需要首先将links 中间的文件全部删除 处理之后文件如果放置在相同的位置,那么绝对不可以使用.txt 结尾 """ fp.clear(fp.links_dir) with open(fp.compressed_train_data) as f: for line in f: words = line.split(" ") link_file = os.path.join(fp.links_dir, words[0] + ".txt") with open(link_file, "a+") as g: g.write(line)
def show_data(): logdir = fp.to_dir + "/show_data" fp.clear(logdir) x = tf.placeholder(tf.float32, [30], name="x") tf.summary.histogram("day", x) train_writer = tf.summary.FileWriter(logdir) merged = tf.summary.merge_all() data = np.load(fp.matrix + "/1.npy") with tf.Session() as sess: for i in range(32 * 30): summary = sess.run(merged, feed_dict={x: data[:, :, i]}) train_writer.add_summary(summary, i) train_writer.close()
def sort_link_bytime(): """ 相同的原因,首先会对于该文件夹中间的数据进行全部清除,然后添加 """ fp.clear(fp.time_dir) files = [ os.path.join(links_dir, f) for f in os.listdir(links_dir) if os.path.isfile(os.path.join(links_dir, f)) ] for linkf in files: with open(linkf) as f: for line in sorted(f): import ntpath fname = ntpath.basename(linkf) fname = fname.split('.')[0] fname += ".time" timef = os.path.join(time_dir, fname) with open(timef, "a+") as g: g.write(line)
def navie_full(full_data=False): """ 仅仅使用基本最简单的全连接来得到的 """ train_data = np.load(fp.train) test_data = np.load(fp.test) june = None source_data = None if (full_data): whole = np.load(fp.fix_data) whole = whole[:, :, 6:9, :] train_data = whole.reshape(132 * 92, 90) june = np.load(fp.June) source_data = june.reshape(132 * 30, 60) if (full_data): assert train_data.shape == (132 * 92, 90) if (june is not None): print("go on!") if (june is None): print("shoule be false") fp.clear(fp.logdir) with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 60], name='x-input') y_ = tf.placeholder(tf.float32, [None, 30], name='y-input') def weight_variable(shape): """Create a weight variable with appropriate initialization.""" initial = tf.truncated_normal(shape, stddev=1) return tf.Variable(initial) def bias_variable(shape): """Create a bias variable with appropriate initialization.""" initial = tf.constant(1.0, shape=shape) return tf.Variable(initial) def dense_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu): with tf.name_scope(layer_name): weights = weight_variable([input_dim, output_dim]) biases = bias_variable([output_dim]) preactivate = tf.matmul(input_tensor, weights) + biases activations = act(preactivate, name='activation') tf.summary.histogram("activation", activations) return activations def dense_batch_relu(input_tensor, input_dim, output_dim, layer_name, is_training): with tf.variable_scope(layer_name): h1 = dense_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu) h2 = tf.contrib.layers.batch_norm(h1, decay=0.9, center=True, scale=True, updates_collections=None, is_training=is_training, reuse=None, trainable=True, scope=layer_name) return tf.nn.relu(h2, 'relu') keep_prob = tf.placeholder(tf.float32) is_training = tf.placeholder(tf.bool) hidden1 = dense_batch_relu(x, 60, 100, "layer1", is_training) dropped1 = tf.nn.dropout(hidden1, keep_prob) hidden2 = dense_batch_relu(dropped1, 100, 100, "layer2", is_training) dropped2 = tf.nn.dropout(hidden2, keep_prob) hidden3 = dense_batch_relu(dropped2, 100, 60, "layer3", is_training) dropped3 = tf.nn.dropout(hidden3, keep_prob) y = dense_layer(dropped3, 60, 30, 'layer4') # 真实数据为: y_ 计算数据为 y diff = tf.abs(tf.subtract(y_, y)) ratio = tf.divide(diff, y_) mean = tf.reduce_sum(ratio) summary = tf.summary.scalar("mean", mean) train_step = tf.train.AdamOptimizer( learning_rate=Para.learningRate).minimize(mean) with tf.Session() as sess: merged = tf.summary.merge_all() tf.global_variables_initializer().run() train_writer = tf.summary.FileWriter(fp.logdir, sess.graph) index = 0 for i in range(Para.epoch): if (i % 50 == 0): summary, res = sess.run( [merged, mean], feed_dict={ x: test_data[:, 0:60], y_: test_data[:, 60:90], keep_prob: 1, is_training: False }) train_writer.add_summary(summary, i) print("{0} : {1}".format(i, res / 71280)) np.random.shuffle(train_data) train_sample = train_data[0:Para.batch_size] train_x = train_sample[:, 0:60] train_y = train_sample[:, 60:90] sess.run(train_step, feed_dict={ x: train_x, y_: train_y, keep_prob: 0.6, is_training: True }) if (full_data): res_june = sess.run(y, feed_dict={ x: source_data, keep_prob: 1.0, is_training: False }) res_june = res_june.reshape(132, 30, 30) write_to_file(res_june) train_writer.close()
def naive_conv(full_data=False): train_data = np.load(fp.train) test_data_y = np.load(fp.test_no_fix)[:, 60:90] test_data_x = np.load(fp.test)[:, 0:60] june = None source_data = None if (full_data): whole = np.load(fp.fix_data) whole = whole[:, :, 6:9, :] train_data = whole.reshape(132 * 92, 90) june = np.load(fp.fix_June) source_data = june.reshape(132 * 30, 60) fp.clear(fp.logdir) x = tf.placeholder(tf.float32, [None, n_input]) y_ = tf.placeholder(tf.float32, [None, n_output]) is_training = tf.placeholder(tf.bool) keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) # Create some wrappers for simplicity def conv1d(x, W, b): # Conv2D wrapper, with bias and relu activation x = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') x = tf.nn.bias_add(x, b) return tf.nn.relu(x) def maxpool2d(x, k=kernel_size_pool): # MaxPool2D wrapper return tf.nn.max_pool(x, ksize=[1, k, 1, 1], strides=[1, 2, 1, 1], padding='SAME') # Create model def conv_net(x, weights, biases, dropout): # Reshape input picture x = tf.reshape(x, shape=[-1, 60, 1, 1]) # Convolution Layer conv1 = conv1d(x, weights['wc1'], biases['bc1']) # Max Pooling (down-sampling) conv1 = maxpool2d(conv1) # Convolution Layer conv2 = conv1d(conv1, weights['wc2'], biases['bc2']) # Max Pooling (down-sampling) conv2 = maxpool2d(conv2) # Fully connected layer # Reshape conv2 output to fit fully connected layer input fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) bn = tf.contrib.layers.batch_norm(fc1, decay=0.9, center=True, scale=True, updates_collections=None, is_training=is_training, reuse=None, trainable=True, scope="bn") fc1 = tf.nn.relu(bn) # Apply Dropout fc1 = tf.nn.dropout(fc1, dropout) # Output, class prediction out = tf.nn.relu(tf.add(tf.matmul(fc1, weights['out']), biases['out'])) return out # Store layers weight & bias weights = { # 5x5 conv, 1 input, 32 outputs 'wc1': tf.Variable(tf.truncated_normal([kernel_size, 1, 1, 10], stddev=dev)), # 5x5 conv, 32 inputs, 64 outputs 'wc2': tf.Variable(tf.truncated_normal([kernel_size, 1, 10, 15], stddev=dev)), # fully connected, 7*7*64 inputs, 1024 outputs 'wd1': tf.Variable(tf.truncated_normal([15 * 15, 200], stddev=dev)), # 1024 inputs, 10 outputs (class prediction) 'out': tf.Variable(tf.truncated_normal([200, n_output], stddev=dev)) } biases = { 'bc1': tf.Variable(tf.constant(bias_init, shape=[10])), 'bc2': tf.Variable(tf.constant(bias_init, shape=[15])), 'bd1': tf.Variable(tf.constant(bias_init, shape=[200])), 'out': tf.Variable(tf.constant(bias_init, shape=[n_output])) } # Construct model y = conv_net(x, weights, biases, keep_prob) # filter the zeros zeros = tf.cast(tf.zeros_like(y_), dtype=tf.bool) ones = tf.cast(tf.ones_like(y_), dtype=tf.bool) loc = tf.where(tf.equal(y_, 0), zeros, ones) rel_y = tf.boolean_mask(y_, loc) pred_y = tf.boolean_mask(y, loc) diff = tf.abs(tf.subtract(rel_y, pred_y)) ratio = tf.divide(diff, rel_y) cost = tf.reduce_sum(ratio) summary = tf.summary.scalar("mean", cost) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) # Initializing the variables init = tf.global_variables_initializer() # Launch the graph with tf.Session() as sess: merged = tf.summary.merge_all() sess.run(init) step = 1 train_writer = tf.summary.FileWriter(fp.logdir, sess.graph) index = 0 for i in range(training_iters): if (i % 50 == 0): summary, res = sess.run( [merged, cost], feed_dict={ x: test_data_x, y_: test_data_y, keep_prob: 1, is_training: False }) train_writer.add_summary(summary, i) print("{0} : {1}".format(i, res / 71280)) np.random.shuffle(train_data) train_sample = train_data[0:batch_size] train_x = train_sample[:, 0:60] train_y = train_sample[:, 60:90] sess.run(optimizer, feed_dict={ x: train_x, y_: train_y, keep_prob: 0.6, is_training: True }) if (full_data): res_june = sess.run(y, feed_dict={ x: source_data, keep_prob: 1.0, is_training: False }) res_june = res_june.reshape(132, 30, 30) full.write_to_file(res_june) train_writer.close()
def take_images(self): if self.noop or self.skip - test_seq_num < self.scanmode + self.transparent: preCols = fp.fp.getSequencerParameter("PreCols") readCols = fp.fp.getSequencerParameter("ReadCols") postCols = fp.fp.getSequencerParameter("PostCols") overCols = fp.fp.getSequencerParameter("OverCols") preRows = fp.fp.getSequencerParameter("PreRows") readRows = fp.fp.getSequencerParameter("ReadRows") postRows = fp.fp.getSequencerParameter("PostRows") scanMode = fp.fp.isScanEnabled() idleFlushTimeout = fp.fp.getConfigurationParameterValue( "sequencerConfig", "idleFlushTimeout") print "Initial sequencer parameters" print "preCols=", preCols print "readCols=", readCols print "postCols=", postCols print "overCols=", overCols print "preRows=", preRows print "readRows=", readRows print "postRows=", postRows print "scanMode=", scanMode print "idleFlushTimeout=", idleFlushTimeout # set up scan mode fp.fp.sequencerConfig().submitChanges({ "underCols": self.undercols, "preCols": self.precols, "readCols": self.readcols, "postCols": self.postcols, "overCols": self.overcols, "preRows": self.prerows, "readRows": self.readrows, "postRows": self.postrows, "overRows": self.overrows, "scanMode": True, "idleFlushTimeout": -1 }) fp.fp.applySubmittedChanges() if idleFlushTimeout != -1: fp.clear() exposure = 1.0 expose_command = lambda: time.sleep(exposure) for i in range(self.scanmode): self.take_image(exposure, expose_command, image_type=None, symlink_image_type=None) if self.noop or self.skip - test_seq_num < self.transparent: fp.fp.sequencerConfig().submitChanges({"transparentMode": 1}) timeout = Duration.ofSeconds(60 * 5) fp.fp.applySubmittedChanges(timeout=timeout) for i in range(self.transparent): self.take_image(exposure, expose_command, image_type=None, symlink_image_type=None) # Restore settings fp.fp.dropAllChanges() if idleFlushTimeout != -1: fp.clear()