Beispiel #1
0
def main():

    set1 = 'train' if len(sys.argv) < 2 else sys.argv[1]
    set2 = [] if len(sys.argv) < 3 else sys.argv[2:]
    train_filter = None
    train_filter2 = None

    model = MODEL(**MODEL_PARAMS)

    print("Reading in training data " + set1)
    train = data_io.read_data(set1)
    print("Extracting features")
    train = model.extract(train)
    print("Saving train features")
    data_io.write_data(set1, train)
    target = data_io.read_target(set1)

    train2 = None
    target2 = None
    for s in set2:
        print "Reading in training data", s
        tr = data_io.read_data(s)
        print "Extracting features"
        tr = model.extract(tr)
        print "Saving train features"
        data_io.write_data(s, tr)
        tg = data_io.read_target(s)
        train2 = tr if train2 is None else pd.concat(
            (train2, tr), ignore_index=True)
        target2 = tg if target2 is None else pd.concat(
            (target2, tg), ignore_index=True)
        train2, target2 = util.random_permutation(train2, target2)
        train_filter2 = (train2['A type'] == 'Numerical') & (train2['B type']
                                                             == 'Numerical')

    # Data selection
    train, target = util.random_permutation(train, target)
    train_filter = ((train['A type'] == 'Numerical') &
                    (train['B type'] == 'Numerical'))

    if train_filter is not None:
        train = train[train_filter]
        target = target[train_filter]
    if train_filter2 is not None:
        train2 = train2[train_filter2]
        target2 = target2[train_filter2]

    print("Training model with optimal weights")
    X = pd.concat([train, train2]) if train2 is not None else train
    y = np.concatenate((target.Target.values, target2.Target.values
                        )) if target2 is not None else target.Target.values
    model.fit(X, y)
    model_path = "nnmodel.pkl"
    print "Saving model", model_path
    data_io.save_model(model, model_path)
Beispiel #2
0
def main():
    
    set1 = 'train' if len(sys.argv) < 2 else sys.argv[1]
    set2 = [] if len(sys.argv) < 3 else sys.argv[2:]
    train_filter = None
    train_filter2 = None
    
    model = MODEL(**MODEL_PARAMS)
    
    print("Reading in training data " + set1)
    train = data_io.read_data(set1)
    print("Extracting features")
    train = model.extract(train)
    print("Saving train features")
    data_io.write_data(set1, train)
    target = data_io.read_target(set1)
    
    train2 = None
    target2 = None
    for s in set2:
        print "Reading in training data", s
        tr = data_io.read_data(s)
        print "Extracting features"
        tr = model.extract(tr)
        print "Saving train features"
        data_io.write_data(s, tr)
        tg = data_io.read_target(s)
        train2 = tr if train2 is None else pd.concat((train2, tr), ignore_index=True)
        target2 = tg if target2 is None else pd.concat((target2, tg), ignore_index=True)
        train2, target2 = util.random_permutation(train2, target2)
        train_filter2  = ((train2['A type'] != 'Numerical') & (train2['B type'] == 'Numerical'))
        #train_filter2 |= ((train2['A type'] == 'Numerical') & (train2['B type'] != 'Numerical'))

    # Data selection
    train, target = util.random_permutation(train, target)
    train_filter  = ((train['A type'] != 'Numerical') & (train['B type'] == 'Numerical')) 
    #train_filter |= ((train['A type'] == 'Numerical') & (train['B type'] != 'Numerical'))

    if train_filter is not None:
        train = train[train_filter]
        target = target[train_filter]
    if train_filter2 is not None:
        train2 = train2[train_filter2]
        target2 = target2[train_filter2]

    print("Training model with optimal weights")
    X = pd.concat([train, train2]) if train2 is not None else train
    y = np.concatenate((target.Target.values, target2.Target.values)) if target2 is not None else target.Target.values  
    model.fit(X, y) 
    model_path = "cnmodel.pkl"
    print "Saving model", model_path
    data_io.save_model(model, model_path)
def main():
    if data.IS_DEBUG:
        import time
        t1 = time.time()

    data_io.read_data()
    simulations.start()
    # analysis.start()
    # data_io.write_data()
    # theory.start()

    if data.IS_DEBUG:
        import time
        t2 = time.time()
        print(f'Time = {t2 - t1}')
    else:
        data_io.write_data()
Beispiel #4
0
def view():
    try:
        info_arr = data.read_data("book_list.txt")
    except:
        print("No data exists")
        info_arr = []

    while (1):
        print(
            "========= 도서 관리 system ========= \n1. 신규 도서 등록\n2. 보유 도서 조회\n3. 도서 삭제\n4. 도서 정보 수정\n5. 특정 도서 정보 조회\n6. 도서 등록번호 조회\n7. 종료"
        )
        print("==========================================")
        menu = int(input("메뉴를 선택하세요: "))
        if menu == int(1):
            i = 0
            num = int(input("등록할 도서 갯수: "))
            while i < num:
                info_arr.append(create.register())
                i += 1
        elif menu == int(2):
            read.ai_list(info_arr)
        elif menu == int(3):
            del_name = str(input("삭제할 도서 이름: "))
            delete.ai_remove(del_name, info_arr)
        elif menu == int(4):
            update_name = str(input("수정할 도서 이름: "))
            update.ai_update(update_name, info_arr)
        elif menu == int(5):
            get_name = str(input("조회할 도서 이름: "))
            print("해당 도서 정보: ", read.ai_search(get_name, info_arr))
        elif menu == int(6):
            get_name = str(input("조회할 도서 이름: "))
            print("해당 도서 등록 번호: ", read.ai_num(get_name, info_arr))
        elif menu == int(7):
            print("시스템 종료")
            data.save_list("book_list.txt", read.ai_list(info_arr))
            break
        else:
            print("no proper number: 시스템 종료")
            exit()
if __name__ == '__main__':
    print datetime.datetime.now()
    
    np.random.seed(1500) # 500 is original

    train_path = 'data/train.tsv'
    submission_path = 'data/test.tsv'
    
    n = 10000
    tags = pickle.load(open('cache/tagged.%s.pkl' % (n)))
    custom_contents = np.array(pickle.load(open('cache/custom_contents.%s.pkl' % (n))))
    submission_custom_contents = np.array(pickle.load(open('cache/s.custom_contents.pkl')))
    submission_tags = pickle.load(open('cache/s.tagged.pkl'))
    
    print 'Reading %s data' % (n)
    data = data_io.read_data(train_path, n)
    submission_data = data_io.read_data(submission_path, 10000) # use all
    
    contents = get_doc_contents(data[:, 2])
    contents = [ features.normalize(content) for content in contents ]
    
    Y = data[:, -1].astype(int)
    
    bestwords = get_bestwords(contents, Y, 100000, n)
    
    submission_contents = get_doc_contents(submission_data[:, 2])
    submission_contents = [ features.normalize(submission_content) for submission_content in submission_contents ]
    X_submission_ids = submission_data[:, 1]
    
    v = TfidfVectorizer(min_df = 2, binary = True, norm = 'l2', smooth_idf = True, sublinear_tf = True, strip_accents = 'unicode', vocabulary = bestwords, ngram_range = (1,2))
    X = v.fit_transform(contents)