def learn(sat_images, label_images, configuration, SEED, FLAGS, save_model_filepath): # Getting the data on which we are going to train data, labels = preparing_data(sat_images, label_images, configuration.ROTATE_IMAGES, configuration.ROTATED_IMG, configuration.IMG_PATCH_SIZE, configuration.IMG_BORDER) # Seperating our data in three distinct sets (taining, validation, testing) # and normalization (train_set, valid_set, test_set, means, stds) = separate_set(data, labels, configuration.VALIDATION_TRAIN_PERC, configuration.VALIDATION_VAL_PERC) # Balancing data train_set = img_help.balance_data(train_set[0], train_set[1]) print( "******************************************************************************" ) print( "\nWe will train on", len(train_set[0]), "patches of size", str(configuration.IMG_TOTAL_SIZE) + "x" + str(configuration.IMG_TOTAL_SIZE)) print("\nInitializing tensorflow graphs for training and validating") num_epochs = configuration.NUM_EPOCHS # Initialization of placeholders for data and labels train_data_node = tf.placeholder( tf.float32, shape=(configuration.BATCH_SIZE, configuration.IMG_TOTAL_SIZE, configuration.IMG_TOTAL_SIZE, configuration.NUM_CHANNELS)) train_label_node = tf.placeholder(tf.float32, shape=(configuration.BATCH_SIZE, configuration.NUM_LABELS)) eval_data_node = tf.placeholder(tf.float32, shape=(None, configuration.IMG_TOTAL_SIZE, configuration.IMG_TOTAL_SIZE, configuration.NUM_CHANNELS)) eval_label_node = tf.placeholder(tf.float32, shape=(None, configuration.NUM_LABELS)) # Define the parameters of the convolutional layers conv_params, last_depth = params_conv_layers(configuration.CONV_ARCH, configuration.CONV_DEPTH, configuration.NUM_CHANNELS, SEED) pool_fact = 2**len(configuration.CONV_ARCH) if configuration.IMG_TOTAL_SIZE % pool_fact != 0: raise "not dividable by pool fact " + str( configuration.IMG_TOTAL_SIZE) + " / " + str(pool_fact) size = int(configuration.IMG_TOTAL_SIZE / pool_fact * configuration.IMG_TOTAL_SIZE / pool_fact * last_depth) fc_params = params_fc_layers(configuration.FC_ARCH, configuration.FC_DEPTH, size, configuration.NUM_LABELS, SEED) # Definition of the complete cnn model. def model(data, train=False): # convolution layers conv_end = init_conv_layers(configuration.CONV_ARCH, conv_params, data) # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. conv_end_shape = conv_end.get_shape().as_list() reshape = tf.reshape( conv_end, [-1, conv_end_shape[1] * conv_end_shape[2] * conv_end_shape[3]]) out = init_fc_layers(configuration.FC_ARCH, fc_params, reshape, train, configuration.KEEP_DROPOUT, SEED) return out logits = model(train_data_node, True) # Computes the probability error for each prediction loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, train_label_node)) tf.summary.scalar('loss', loss) # L2 regularization for the fully connected parameters. regularizers = tf.nn.l2_loss(fc_params[0][0]) + tf.nn.l2_loss( fc_params[0][1]) for params in fc_params[1:]: regularizers += tf.nn.l2_loss(params[0]) regularizers += tf.nn.l2_loss(params[1]) # Add the regularization term to the loss. loss += 5e-4 * (regularizers) # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Use adam optimizer as it optimises automatically the learning rate. adam_opt = tf.train.AdamOptimizer(configuration.LEARNING_RATE) optimizer = adam_opt.minimize(loss, global_step=batch) # Predictions for the minibatch, validation set and test set. train_prediction_graph = tf.nn.softmax(logits) # Compute predictions for validation and test correct_predictions_train_graph = tf.equal( tf.argmax(train_prediction_graph, 1), tf.argmax(train_label_node, 1)) # Accuracy for training accuracy_train_graph = tf.reduce_mean( tf.cast(correct_predictions_train_graph, tf.float32)) # Validation / Testing set predictions eval_predictions_graph = tf.nn.softmax(model(eval_data_node)) # Compute predictions for validation and test eval_correct_predictions_graph = tf.equal( tf.argmax(eval_predictions_graph, 1), tf.argmax(eval_label_node, 1)) # Accuracy computation eval_accuracy_graph = tf.reduce_mean( tf.cast(eval_correct_predictions_graph, tf.float32)) # Will be used later when we need to compute the f1 score threshold_tf = tf.Variable(0, name="threshold_tf", dtype=tf.float32) # Index [0] corresponds to a road, which we will consider as positive therefore 1. pos_predictions_thresh_graph = tf.cast( tf.transpose(eval_predictions_graph)[0] > threshold_tf, tf.int64) # Here for the true labels we have the oposite -> 1 is background, road is 0 so we use argmin to reverse that true_predictions_graph = tf.argmin(eval_label_node, 1) # Here we have a boolean array with true values where instances of the prediction correspond to the labels. correct_predictions_thresh = tf.equal(pos_predictions_thresh_graph, true_predictions_graph) # Add ops to save and restore all the variables. saver = tf.train.Saver() # initialise all varibales operation init = tf.global_variables_initializer() s = tf.Session() if configuration.RESTORE_MODEL: # Restore variables from disk. if not os.path.exists(save_model_filepath + ".index"): raise ValueError("model not found : " + save_model_filepath) saver.restore(s, save_model_filepath) print("Model restored from :", save_model_filepath) else: # run initialisation of variables s.run(init) print('\nInitialized!') train_size = len(train_set[0]) # Loop through training steps. print('\nTotal number of epochs for training :', num_epochs) print('Total number of steps for epoch :', int(train_size / configuration.BATCH_SIZE)) print('Total number of steps :', num_epochs * int(train_size / configuration.BATCH_SIZE)) print("\n") print( "******************************************************************************" ) print(" Training") print( "******************************************************************************" ) try: batch_size = configuration.BATCH_SIZE for epoch in range(num_epochs): print( "\n******************************************************************************" ) print("training for epoch :", epoch + 1, "out of", num_epochs, "epochs") perm_idx = np.random.permutation(train_size) batch_bar = progressbar.ProgressBar( max_value=int(train_size / configuration.BATCH_SIZE)) for step in range(int(train_size / configuration.BATCH_SIZE)): batch_idx = perm_idx[step * batch_size:(step + 1) * batch_size] # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. batch_data = train_set[0][batch_idx] batch_labels = train_set[1][batch_idx] # This dictionary maps the batch data (as a np array) to the # node in the graph is should be fed to. feed_dict = { train_data_node: batch_data, train_label_node: batch_labels } if step % configuration.RECORDING_STEP == 0: _, l = s.run([optimizer, loss], feed_dict=feed_dict) print( "\ncomputing intermediate accuracy and loss at step", step) print("computing train accuracy") acc = batch_sum(s, eval_accuracy_graph, train_set, configuration.EVAL_BATCH_SIZE, eval_data_node, eval_label_node) train_acc = acc / int( np.ceil( len(train_set[0]) / configuration.EVAL_BATCH_SIZE)) logger.append_log("Accuracy_training", train_acc) print("computing validation accuracy") acc = batch_sum(s, eval_accuracy_graph, valid_set, configuration.EVAL_BATCH_SIZE, eval_data_node, eval_label_node) valid_acc = acc / int( np.ceil( len(valid_set[0]) / configuration.EVAL_BATCH_SIZE)) logger.append_log("Accuracy_validation", valid_acc) logger.append_log("Loss_taining", l) print('\n%.2f' % (float(step) * configuration.BATCH_SIZE / train_size) + '% of Epoch ' + str(epoch + 1)) print("loss :", l) print("training set accuracy :", train_acc) print("validation set accuracy :", valid_acc) saver.save(s, FLAGS.train_dir + "/model.ckpt") print("\nContinuing training steps") sys.stdout.flush() else: # Run the graph and fetch some of the nodes. batch_bar.update(step) _, l, predictions_train = s.run( [optimizer, loss, train_prediction_graph], feed_dict=feed_dict) batch_bar.finish() # What do here ? nothing normally as done at beginning of each epoch except KeyboardInterrupt: print("Interrupted at epoch ", epoch + 1) print("Restoring model from last evaluation") saver.restore(s, FLAGS.train_dir + "/model.ckpt") pass logger.set_log("Epoch_stop", epoch + 1) print( "\n******************************************************************************" ) print("Finished training") print("\nScoring on validation set") acc = batch_sum(s, eval_accuracy_graph, valid_set, configuration.EVAL_BATCH_SIZE, eval_data_node, eval_label_node) accuracy = acc / int( np.ceil(len(valid_set[0]) / configuration.EVAL_BATCH_SIZE)) logger.append_log("Accuracy_validation", accuracy) print("Accuracy rating is :", accuracy) print("\nScoring on testing set") acc = batch_sum(s, eval_accuracy_graph, test_set, configuration.EVAL_BATCH_SIZE, eval_data_node, eval_label_node) accuracy = acc / int( np.ceil(len(test_set[0]) / configuration.EVAL_BATCH_SIZE)) logger.set_log("Accuracy_test", accuracy) print("Accuracy rating is :", accuracy) max_thresh = 0.5 # base threshold in case we don't want to fine tune it. if configuration.OPTI_F1: print( "\n******************************************************************************" ) print("Finding best f1_score with different thresholds") # Computing F1 score from predictions with different thresholds thresh_start = 0 thresh_end = 1 thresh_steps = 10 theta_thresh = configuration.THETA_THRESH diff_thresh = 1 while (diff_thresh > theta_thresh): print("\nTesting for threshold between", thresh_start, "and", thresh_end) threshs = np.linspace(thresh_start, thresh_end, thresh_steps) f1_scores = [] for thresh in threshs: s.run(threshold_tf.assign(thresh)) print("\nComputing F1-score with threshold :", thresh) f1_score = compute_f1_tf(s, pos_predictions_thresh_graph, correct_predictions_thresh, valid_set, configuration.EVAL_BATCH_SIZE, eval_data_node, eval_label_node) f1_scores.append(f1_score) print("F1-score :", f1_score) # Output test with best Threshold logger.append_log("F1-score_validation", f1_scores) logger.append_log("F1-score_threshs_validation", list(threshs)) idx_max_thresh = np.argmax(f1_scores) diff_thresh = f1_scores[idx_max_thresh] - f1_scores[0] thresh_start = threshs[max(idx_max_thresh - 1, 0)] thresh_end = threshs[min(idx_max_thresh + 1, thresh_steps - 1)] print("\nDifference :", diff_thresh) max_thresh = threshs[idx_max_thresh] print("Best threshold found with confidence", theta_thresh, ":", max_thresh) # Test set f1_score s.run(threshold_tf.assign(max_thresh)) print("\nTest set F1-score with best threshold :", max_thresh) f1_score = compute_f1_tf(s, pos_predictions_thresh_graph, correct_predictions_thresh, test_set, configuration.EVAL_BATCH_SIZE, eval_data_node, eval_label_node) logger.set_log("F1-score_test", f1_score) print("F1-score:", f1_score) if not configuration.RESTORE_MODEL: print("\nSaving our model") saver.save(s, save_model_filepath) logger.save_log() return s, model, means, stds, max_thresh
def start_ekimu_check(ekimu_site_url, ekimu_site_name, logger, connection, cursor): # URLの存在チェック # site_url = u'http://wave.pref.wakayama.lg.jp/ekimu2/' try: f = urllib2.urlopen(ekimu_site_url) logger.set_log(ekimu_site_url) f.close() except urllib2.HTTPError: logger.set_log("NotFound:" + ekimu_site_url) return False # 処理開始時間 process_start = datetime.datetime.now() # 案件件数 count_tenders = 0; # 登録番号の取得 sql = u'select max(registration_no) from t_tenders;' cursor.execute(sql) record = cursor.fetchone() max_registration_no = record[0] if max_registration_no == None: max_registration_no = 0 registration_no = max_registration_no + 1 # print(registration_no) # 案件情報取得のためのパラメータ(リスト) params = [] # 現在公開分 => 一般競争入札 param = MainParam() param.keishu_cd = u'1' param.public_flag = u'0' params.append(param) # 現在公開分 => 簡易公開調達 param = MainParam() param.keishu_cd = u'2' param.public_flag = u'0' params.append(param) # 既に終了分 => 一般競争入札 param = MainParam() param.keishu_cd = u'1' param.public_flag = u'1' params.append(param) # 既に終了分 => 簡易公開調達 param = MainParam() param.keishu_cd = u'2' param.public_flag = u'1' params.append(param) for param in params: # HTMLを取得 # html_anken_page.py より html_page = html_anken_page.HtmlAnkenPage() logger.set_log('class HtmlAnkenPage') html_page.set_keishu_cd(param.keishu_cd) html_page.set_public_flag(param.public_flag) html_page.get_html(ekimu_site_url) # HTMLから案件情報ページURLリストを取得 html_page.get_page_list(ekimu_site_url) # 案件情報ページURLリストから案件情報のリストを取得 for page in html_page.page_list: logger.set_log(page) html_list = html_anken_list.HtmlAnkenList(page) html_list.get_anken_list(ekimu_site_url) for url in html_list.anken_url_list: logger.set_log(url) count_tenders = count_tenders + 1 html_disp = html_anken_disp.HtmlAnkenDisp() html_disp.set_url(url, ekimu_site_url) # 案件情報を取得 html_disp.get_anken() # テーブル:t_tenders の更新 t_tenders = dao_t_tenders.DaoTTenders() t_tenders.make_sql_exist() sql_params = [] sql_params.append(html_disp.anken.anken_no) cursor = t_tenders.exec_sql_params(connection, cursor, sql_params) if 0 == cursor.rowcount: # INSERT t_tenders t_tenders.make_sql_select_max_id() cursor = t_tenders.exec_sql(connection, cursor) record = cursor.fetchone() max_id = record[0] if max_id == None: max_id = 0 t_tenders.make_sql_insert() sql_params = [] id = max_id + 1 sql_params.append(id) sql_params.append(html_disp.anken.nyusatsu_system) sql_params.append(html_disp.anken.nyusatsu_type) sql_params.append(html_disp.anken.anken_no) sql_params.append(html_disp.anken.anken_url) sql_params.append(html_disp.anken.anken_name) sql_params.append(html_disp.anken.keishu_cd) sql_params.append(html_disp.anken.keishu_name) sql_params.append(html_disp.anken.public_flag) sql_params.append(html_disp.anken.company_area) sql_params.append(html_disp.anken.anken_open_date) sql_params.append(html_disp.anken.anken_close_date) sql_params.append(html_disp.anken.tender_date) sql_params.append(html_disp.anken.tender_place) sql_params.append(html_disp.anken.limit_date) sql_params.append(html_disp.anken.gyoumu_kbn_1) sql_params.append(html_disp.anken.gyoumu_kbn_2) sql_params.append(html_disp.anken.kasitu_name) sql_params.append(html_disp.anken.tanto_name) sql_params.append(html_disp.anken.notes) sql_params.append(html_disp.anken.result_open_date) sql_params.append(html_disp.anken.result_close_date) sql_params.append(html_disp.anken.raku_name) sql_params.append(html_disp.anken.price) sql_params.append(registration_no) sql_params.append(ekimu_site_name) sql_params.append(html_disp.anken.attached_file_1) sql_params.append(html_disp.anken.attached_file_2) sql_params.append(html_disp.anken.attached_file_3) t_tenders.exec_sql_params(connection, cursor, sql_params) # print "insert" else: # UPDATE t_tenders t_tenders.make_sql_update() sql_params = [] sql_params.append(html_disp.anken.nyusatsu_system) sql_params.append(html_disp.anken.nyusatsu_type) sql_params.append(html_disp.anken.anken_url) sql_params.append(html_disp.anken.anken_name) sql_params.append(html_disp.anken.keishu_cd) sql_params.append(html_disp.anken.keishu_name) sql_params.append(html_disp.anken.public_flag) sql_params.append(html_disp.anken.company_area) sql_params.append(html_disp.anken.anken_open_date) sql_params.append(html_disp.anken.anken_close_date) sql_params.append(html_disp.anken.tender_date) sql_params.append(html_disp.anken.tender_place) sql_params.append(html_disp.anken.limit_date) sql_params.append(html_disp.anken.gyoumu_kbn_1) sql_params.append(html_disp.anken.gyoumu_kbn_2) sql_params.append(html_disp.anken.kasitu_name) sql_params.append(html_disp.anken.tanto_name) sql_params.append(html_disp.anken.notes) sql_params.append(html_disp.anken.result_open_date) sql_params.append(html_disp.anken.result_close_date) sql_params.append(html_disp.anken.raku_name) sql_params.append(html_disp.anken.price) sql_params.append(registration_no) sql_params.append(ekimu_site_name) sql_params.append(html_disp.anken.attached_file_1) sql_params.append(html_disp.anken.attached_file_2) sql_params.append(html_disp.anken.attached_file_3) sql_params.append(html_disp.anken.anken_no) t_tenders.exec_sql_params(connection, cursor, sql_params) # print util.clean_string(t_tenders.get_sql()) # print cursor.query.decode('utf-8') # logger.set_log(cursor.query.decode('utf-8')) # logger.set_log(util.clean_string(cursor.query.encode('utf-8'))) # print cursor.query # print "update" # テーブル:j_nyusatsu の更新 # テーブルがでかくなりすぎた # この処理はいらないかも # j_nyusatsu = dao_j_nyusatsu.DaoJNyusatsu() # j_nyusatsu.make_sql_select_max_id() # cursor = j_nyusatsu.exec_sql(connection, cursor) # record = cursor.fetchone() # max_id = record[0] # if max_id == None: # max_id = 0 # # print max_id # # j_nyusatsu.make_sql_insert() # # sql_params = [] # id = max_id + 1 # # # id, # sql_params.append(id) # # nyusatsu_system, # sql_params.append(html_disp.anken.nyusatsu_system) # # nyusatsu_type, # sql_params.append(html_disp.anken.nyusatsu_type) # # anken_no, # sql_params.append(html_disp.anken.anken_no) # # anken_url, # sql_params.append(html_disp.anken.anken_url) # # anken_name, # sql_params.append(html_disp.anken.anken_name) # # keishu_cd, # sql_params.append(html_disp.anken.keishu_cd) # # keishu_name, # sql_params.append(html_disp.anken.keishu_name) # # public_flag, # sql_params.append(html_disp.anken.public_flag) # # company_area, # sql_params.append(html_disp.anken.company_area) # # anken_open_date, # sql_params.append(html_disp.anken.anken_open_date) # # anken_close_date, # sql_params.append(html_disp.anken.anken_close_date) # # tender_date, # sql_params.append(html_disp.anken.tender_date) # # tender_place, # sql_params.append(html_disp.anken.tender_place) # # limit_date, # sql_params.append(html_disp.anken.limit_date) # # gyoumu_kbn_1, # sql_params.append(html_disp.anken.gyoumu_kbn_1) # # gyoumu_kbn_2, # sql_params.append(html_disp.anken.gyoumu_kbn_2) # # kasitu_name, # sql_params.append(html_disp.anken.kasitu_name) # # tanto_name, # sql_params.append(html_disp.anken.tanto_name) # # notes, # sql_params.append(html_disp.anken.notes) # # result_open_date, # sql_params.append(html_disp.anken.result_open_date) # # result_close_date, # sql_params.append(html_disp.anken.result_close_date) # # raku_name, # sql_params.append(html_disp.anken.raku_name) # # price, # sql_params.append(html_disp.anken.price) # # 登録番号 # sql_params.append(registration_no) # # サイト名 # sql_params.append(config.SITE_NAME) # # j_nyusatsu.exec_sql_params(connection, cursor, sql_params) # 処理終了時間 process_end = datetime.datetime.now() # 処理時間(秒) process_seconds = (process_end - process_start).seconds # テーブル:j_histories の更新 j_histories = dao_j_histories.DaoJHistories() j_histories.make_sql_insert() sql_params = [] sql_params.append(process_start) sql_params.append(process_end) sql_params.append(process_seconds) sql_params.append(count_tenders) j_histories.exec_sql_params(connection, cursor, sql_params)
# pg_connectin 作成 import dao_pg_connection pg_connection = dao_pg_connection.PgConnection() pg_connection.set_pg_connection_open(logger) connection = pg_connection.get_pg_connection() cursor = connection.cursor() # インスタンス生成 dao_t_nyusatsu = DaoTNyusatsu() # テーブルの初期化SQL 生成 dao_t_nyusatsu.make_sql_delete() # sqlをlogに書き出す logger.set_log(dao_t_nyusatsu.get_sql()) # sql実行 cursor = dao_t_nyusatsu.exec_sql(connection, cursor) # Max_id取得用SQL 生成 dao_t_nyusatsu.make_sql_select_max_id() # sqlをlogに書き出す logger.set_log(dao_t_nyusatsu.get_sql()) # sql実行 cursor = dao_t_nyusatsu.exec_sql(connection, cursor) record = cursor.fetchone() max_id = record[0] if max_id == None:
if(not exist_log_dir): os.mkdir(log_dir_path) os.chmod(log_dir_path, 0777) log_file_path = log_dir_path + "access_log_" + datetime.datetime.now().strftime("%Y-%m-%d") + ".log" #ログファイルの存在チェック exist_log_file = os.path.exists(log_file_path) if(not exist_log_file): # 無ければ作る f = open(log_file_path, 'w') # パーミッションを変更 os.chmod(log_file_path, 0777) f.close() logger = logger.Logger(log_file_path) logger.set_log(u'start') logger.set_log(u'DB HOST : ' + config.DB_HOST) logger.set_log(u'DB NAME : ' + config.DB_NAME) logger.set_log(u'SITE URL : ' + config.SITE_URL) logger.set_log(u'LOG DIR : ' + config.LOG_FILE_DIR_PATH) # ### 各テーブルとその詳細 # # j_histories # 更新履歴 # # j_nyusatsu # 役務入札案件のジャーナル # cronが走るたびに表示されている案件のレコードが新規に追加される # 登録番号(1回の登録処理で登録された案件で共通)
import dao_pg_connection pg_connection = dao_pg_connection.PgConnection() pg_connection.set_pg_connection_open(logger) connection = pg_connection.get_pg_connection() cursor = connection.cursor() # テスト用案件番号 anken_no = "560700-H2503081217-21" sql_params = [] # インスタンス生成 dao_t_tenders = DaoTTenders() # レコードの存在チェック dao_t_tenders.make_sql_exist() logger.set_log(dao_t_tenders.get_sql()) sql_params.append(anken_no) cursor = dao_t_tenders.exec_sql_params(connection, cursor, sql_params) logger.set_log(str(cursor.rowcount)) if 0 == cursor.rowcount: # レコード新規作成 # Max_id取得用SQL 生成 dao_t_tenders.make_sql_select_max_id() # sqlをlogに書き出す logger.set_log(dao_t_tenders.get_sql())