def main(): set1 = 'train' if len(sys.argv) < 2 else sys.argv[1] set2 = [] if len(sys.argv) < 3 else sys.argv[2:] train_filter = None train_filter2 = None model = MODEL(**MODEL_PARAMS) print("Reading in training data " + set1) train = data_io.read_data(set1) print("Extracting features") train = model.extract(train) print("Saving train features") data_io.write_data(set1, train) target = data_io.read_target(set1) train2 = None target2 = None for s in set2: print "Reading in training data", s tr = data_io.read_data(s) print "Extracting features" tr = model.extract(tr) print "Saving train features" data_io.write_data(s, tr) tg = data_io.read_target(s) train2 = tr if train2 is None else pd.concat( (train2, tr), ignore_index=True) target2 = tg if target2 is None else pd.concat( (target2, tg), ignore_index=True) train2, target2 = util.random_permutation(train2, target2) train_filter2 = (train2['A type'] == 'Numerical') & (train2['B type'] == 'Numerical') # Data selection train, target = util.random_permutation(train, target) train_filter = ((train['A type'] == 'Numerical') & (train['B type'] == 'Numerical')) if train_filter is not None: train = train[train_filter] target = target[train_filter] if train_filter2 is not None: train2 = train2[train_filter2] target2 = target2[train_filter2] print("Training model with optimal weights") X = pd.concat([train, train2]) if train2 is not None else train y = np.concatenate((target.Target.values, target2.Target.values )) if target2 is not None else target.Target.values model.fit(X, y) model_path = "nnmodel.pkl" print "Saving model", model_path data_io.save_model(model, model_path)
def main(): set1 = 'train' if len(sys.argv) < 2 else sys.argv[1] set2 = [] if len(sys.argv) < 3 else sys.argv[2:] train_filter = None train_filter2 = None model = MODEL(**MODEL_PARAMS) print("Reading in training data " + set1) train = data_io.read_data(set1) print("Extracting features") train = model.extract(train) print("Saving train features") data_io.write_data(set1, train) target = data_io.read_target(set1) train2 = None target2 = None for s in set2: print "Reading in training data", s tr = data_io.read_data(s) print "Extracting features" tr = model.extract(tr) print "Saving train features" data_io.write_data(s, tr) tg = data_io.read_target(s) train2 = tr if train2 is None else pd.concat((train2, tr), ignore_index=True) target2 = tg if target2 is None else pd.concat((target2, tg), ignore_index=True) train2, target2 = util.random_permutation(train2, target2) train_filter2 = ((train2['A type'] != 'Numerical') & (train2['B type'] == 'Numerical')) #train_filter2 |= ((train2['A type'] == 'Numerical') & (train2['B type'] != 'Numerical')) # Data selection train, target = util.random_permutation(train, target) train_filter = ((train['A type'] != 'Numerical') & (train['B type'] == 'Numerical')) #train_filter |= ((train['A type'] == 'Numerical') & (train['B type'] != 'Numerical')) if train_filter is not None: train = train[train_filter] target = target[train_filter] if train_filter2 is not None: train2 = train2[train_filter2] target2 = target2[train_filter2] print("Training model with optimal weights") X = pd.concat([train, train2]) if train2 is not None else train y = np.concatenate((target.Target.values, target2.Target.values)) if target2 is not None else target.Target.values model.fit(X, y) model_path = "cnmodel.pkl" print "Saving model", model_path data_io.save_model(model, model_path)
def main(): if data.IS_DEBUG: import time t1 = time.time() data_io.read_data() simulations.start() # analysis.start() # data_io.write_data() # theory.start() if data.IS_DEBUG: import time t2 = time.time() print(f'Time = {t2 - t1}') else: data_io.write_data()
def view(): try: info_arr = data.read_data("book_list.txt") except: print("No data exists") info_arr = [] while (1): print( "========= 도서 관리 system ========= \n1. 신규 도서 등록\n2. 보유 도서 조회\n3. 도서 삭제\n4. 도서 정보 수정\n5. 특정 도서 정보 조회\n6. 도서 등록번호 조회\n7. 종료" ) print("==========================================") menu = int(input("메뉴를 선택하세요: ")) if menu == int(1): i = 0 num = int(input("등록할 도서 갯수: ")) while i < num: info_arr.append(create.register()) i += 1 elif menu == int(2): read.ai_list(info_arr) elif menu == int(3): del_name = str(input("삭제할 도서 이름: ")) delete.ai_remove(del_name, info_arr) elif menu == int(4): update_name = str(input("수정할 도서 이름: ")) update.ai_update(update_name, info_arr) elif menu == int(5): get_name = str(input("조회할 도서 이름: ")) print("해당 도서 정보: ", read.ai_search(get_name, info_arr)) elif menu == int(6): get_name = str(input("조회할 도서 이름: ")) print("해당 도서 등록 번호: ", read.ai_num(get_name, info_arr)) elif menu == int(7): print("시스템 종료") data.save_list("book_list.txt", read.ai_list(info_arr)) break else: print("no proper number: 시스템 종료") exit()
if __name__ == '__main__': print datetime.datetime.now() np.random.seed(1500) # 500 is original train_path = 'data/train.tsv' submission_path = 'data/test.tsv' n = 10000 tags = pickle.load(open('cache/tagged.%s.pkl' % (n))) custom_contents = np.array(pickle.load(open('cache/custom_contents.%s.pkl' % (n)))) submission_custom_contents = np.array(pickle.load(open('cache/s.custom_contents.pkl'))) submission_tags = pickle.load(open('cache/s.tagged.pkl')) print 'Reading %s data' % (n) data = data_io.read_data(train_path, n) submission_data = data_io.read_data(submission_path, 10000) # use all contents = get_doc_contents(data[:, 2]) contents = [ features.normalize(content) for content in contents ] Y = data[:, -1].astype(int) bestwords = get_bestwords(contents, Y, 100000, n) submission_contents = get_doc_contents(submission_data[:, 2]) submission_contents = [ features.normalize(submission_content) for submission_content in submission_contents ] X_submission_ids = submission_data[:, 1] v = TfidfVectorizer(min_df = 2, binary = True, norm = 'l2', smooth_idf = True, sublinear_tf = True, strip_accents = 'unicode', vocabulary = bestwords, ngram_range = (1,2)) X = v.fit_transform(contents)