def grid_search(pipeline, train_path, test_path):
    parameters = {
        'clf__C': (1, 10, 20),
        'fs__k': (20000, 500000, 100000, 300000)
    }
    X_train, y_train = load_dataset(train_path)
    X_test, y_test = load_dataset(test_path)
    target_names = list(set([i[0] for i in y_train]))
    print("%d documents (training set)" % len(X_train))
    print("%d documents (test set)" % len(X_test))
    print("%d categories" % len(target_names))
    print()

    gridsearch = GridSearchCV(pipeline, parameters, cv=5, n_jobs=-1, verbose=1)

    print("Performing grid search...")
    print("pipeline:", [name for name, _ in pipeline.steps])
    print("parameters:")
    print(parameters)
    t0 = time()
    gridsearch.fit(X_train, y_train)
    print("done in %0.3fs" % (time() - t0))
    print()

    params = []
    print("Best dev score: %0.3f" % gridsearch.best_score_)
    print("Best parameters set:")
    best_parameters = gridsearch.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print("\t%s: %r" % (param_name, best_parameters[param_name]))
        params.append((param_name, best_parameters[param_name]))
    print("Best test score: %0.3f" % gridsearch.score(X_test, y_test))
Example #2
0
def main():
    '''
        combine all the opreations together
    '''
    #1 define the path 
    FILE_DIRECTORY = "hdfs://master:9000/user/hadoop/fault_diagnosis/dataset/"
    TRAIN_PATH = FILE_DIRECTORY + "train_set.csv"
    VALIDATE_PATH = FILE_DIRECTORY + "validate_set.csv"

    train_set = load_dataset(TRAIN_PATH)
    validate_set = load_dataset(VALIDATE_PATH)
    #2 model define
    
    PCA_model = pca_opreator(2)
    rf_model = random_forest_opreator()
    pipeline = Pipeline(stages=[PCA_model, rf_model])
    model = pipeline.fit(train_set)
    #3
    train_dataframe = model.transform(train_set)
    validate_dataframe = model.transform(validate_set)

    validate_dataframe.show(10)
    #4 evaluate
    evaluator = MulticlassClassificationEvaluator(labelCol="indexed_label", predictionCol="prediction", metricName="accuracy")
    accuracy_train = evaluator.evaluate(train_dataframe)
    accuracy_validate = evaluator.evaluate(validate_dataframe)
    print("accuracy on train set = %g" % (accuracy_train))
    print("accuracy on validation set = %g" % (accuracy_validate))
Example #3
0
def checkIndex(index):
    if index == '1':
        myName = input('Please insert your name:\n')
        print("Preparing for catching a pos image...")
        CatchFace("Catch A Pos Face", 500, './faceData/posFaceData')
        print("Pos model caught!")
        print("Continue?")
        conFlag = input('Y?:\n')
        if conFlag.upper() == 'Y':
            print("Preparing for catching a neg image...")
            CatchFace("Catch A Neg Face", 500, './faceData/negFaceData')
            print("Neg model caught!")
            print("Image loading, please wait...")
            load_dataset("./faceData")
            print("Data loaded!")
            print("Model crafting, please wait...")
            faceTrainMain(myName)
            clearFolder('./faceData/posFaceData')
            clearFolder('./faceData/negFaceData')
            print("Model Crafted!")
            showMenu()
        else:
            showMenu()
    elif index == '2':
        try:
            recFace()
        finally:
            showMenu()
    elif index == '3':
        print('Thank you for using!')
        exit()
    else:
        showMenu()
Example #4
0
    def update_dataset(self):
        create_dataset()
        output = load_dataset()
        count_all_images = len(os.listdir(base_dir / "Test_Sliced_Images"))

        predictions = {}

        image_count = 0

        for image, label in load_dataset():
            image = np.expand_dims(image, axis=2) / 255.
            prediction = self.model.predict(np.expand_dims(image, axis=0))

            if label not in predictions:
                predictions[label] = {"prediction": prediction, "count": 1}
                continue

            predictions[label]["prediction"] += prediction
            predictions[label]["count"] += 1

            image_count += 1
            print(f"[+] prediction {round(image_count / count_all_images * 100, 2)}%")

        for i in predictions:
            predictions[i] = (predictions[i]["prediction"] / predictions[i]["count"]).tolist()

        with open(base_dir / "Saved_Model/outputs.json", "w") as log:
            log.write(json.dumps(predictions, ensure_ascii=False))
def _load_train_test_data_helper():
    if FLAGS.tvt_options == 'all':
        dataset = load_dataset(FLAGS.dataset, 'all', FLAGS.align_metric,
                               FLAGS.node_ordering)
        dataset.print_stats()
        # Node feature encoding must be done at the entire dataset level.
        print('Encoding node features')
        dataset, num_node_feat = encode_node_features(dataset=dataset)
        print('Splitting dataset into train test')
        dataset_train, dataset_test = dataset.tvt_split(
            [FLAGS.train_test_ratio], ['train', 'test'])
    elif FLAGS.tvt_options == 'train,test':
        dataset_test = load_dataset(FLAGS.dataset, 'test', FLAGS.align_metric,
                                    FLAGS.node_ordering)
        dataset_train = load_dataset(FLAGS.dataset, 'train',
                                     FLAGS.align_metric, FLAGS.node_ordering)
        dataset_train, num_node_feat_train = \
            encode_node_features(dataset=dataset_train)
        dataset_test, num_node_feat_test = \
            encode_node_features(dataset=dataset_test)
        if num_node_feat_train != num_node_feat_test:
            raise ValueError('num_node_feat_train != num_node_feat_test '
                             '{] != {}'.format(num_node_feat_train,
                                               num_node_feat_test))
        num_node_feat = num_node_feat_train
    else:
        print(FLAGS.tvt_options)
        raise NotImplementedError()
    dataset_train.print_stats()
    dataset_test.print_stats()
    train_data = OurModelData(dataset_train, num_node_feat)
    test_data = OurModelData(dataset_test, num_node_feat)
    return train_data, test_data
Example #6
0
def train_svm():
    input_dir = 'data'
    train_dir = os.path.join(input_dir, 'processed_data', 'train')
    validation_dir = os.path.join(input_dir, 'processed_data', 'validation')

    trainX, trainy = load_dataset(train_dir)
    testX, testy = load_dataset(validation_dir)

    model = load_model(os.path.join('model', 'facenet_keras.h5'))

    trainX = get_embedded_data(model, trainX)
    testX = get_embedded_data(model, testX)

    trainX = normalize(trainX)
    testX = normalize(testX)

    label_encode = LabelEncoder()
    label_encode.fit(trainy)
    trainy = label_encode.transform(trainy)
    testy = label_encode.transform(testy)

    np.save(os.path.join('model', 'classes.npy'), label_encode.classes_)

    model = get_svm_model(trainX, trainy)
    filename = os.path.join('model', 'svm_model.sav')
    joblib.dump(model, filename)
    print("SVM model saved!")

    pred_train = model.predict(trainX)
    pred_test = model.predict(testX)
    score_train = accuracy_score(trainy, pred_train)
    score_test = accuracy_score(testy, pred_test)

    print("Accuracy\nTrain : ", score_train, "\n", "Test : ", score_test)
Example #7
0
def main():
    X_train, y_train = load_dataset(
        '../data/disaster_response_messages_training.csv', 'weather_related')
    X_val, y_val = load_dataset(
        '../data/disaster_response_messages_training.csv', 'weather_related')
    X_test, y_test = load_dataset(
        '../data/disaster_response_messages_test.csv', 'weather_related')

    # get annotated labels
    cats_train = get_categories(y_train)
    cats_val = get_categories(y_val)
    cats_test = get_categories(y_test)

    train_cnn(X_train, cats_train, X_val, cats_val, X_test, cats_test, 10)
Example #8
0
def test_load_dataset():
    """
    Test loading the traffic datasets
    """
    from load_data import load_dataset
    import os
    zhanqian = load_dataset(os.getcwd() + '/../data/zhanqian.csv')
    huizhou = load_dataset(os.getcwd() + '/../data/zhonglou.csv')
    ningguo = load_dataset(os.getcwd() + '/../data/ningguo.csv')
    xiyou = load_dataset(os.getcwd() + '/../data/xiyou.csv')
    assert not zhanqian.empty
    assert not huizhou.empty
    assert not ningguo.empty
    assert not xiyou.empty
def _plot_pairs(pairs, dataset_name, num_pairs, fix_match_pos, dir, want,
                want_gid_tuples, need_eps, mode, pick_best):
    dataset = load_dataset(dataset_name, 'all', 'mcs', 'bfs')
    dataset.print_stats()
    natts, *_ = get_dataset_conf(dataset_name)
    node_feat_name = natts[0] if len(
        natts) >= 1 else None  # TODO: only one node feat
    if pairs is None:
        pairs = dataset.get_all_pairs()
    pairs, num_pairs = _filter_pairs(pairs, num_pairs, want_gid_tuples)
    assert num_pairs >= 1 and len(pairs) >= num_pairs, '{} {}'.format(
        num_pairs, len(pairs))
    all_pair_gid_tuples = sorted(pairs.keys())
    random.Random(123).shuffle(all_pair_gid_tuples)
    for i in range(num_pairs):
        gid1, gid2 = all_pair_gid_tuples[i]
        # if gid1 != 106:
        #     continue
        # else:
        #     pass
        g1 = dataset.look_up_graph_by_gid(gid1).get_nxgraph()
        g2 = dataset.look_up_graph_by_gid(gid2).get_nxgraph()
        fnb = '{}_{}_{}'.format(dataset_name, g1.graph['gid'], g2.graph['gid'])
        _plot_pairs_helper(fnb, g1, g2, pairs, node_feat_name, dataset, gid1,
                           gid2, fix_match_pos, dir, want, need_eps, mode,
                           pick_best)
Example #10
0
def main():
    # get nearest neighbor count
    if len(sys.argv) < 2:
        print("Please enter number of nearest neighbor:")
        no_of_nearest_neighbor = input()
    else:
        no_of_nearest_neighbor = sys.argv[1]

    # load dataset
    print(
        "Please put the dataset in data folder(named test.txt and train.txt)")
    train_set, test_set = load_dataset()

    print("Length of test set is %d and length of train set is %d" %
          (len(train_set), len(test_set)))

    # predictions
    p = prediction(train_set, test_set)
    print(
        "Select option- 1: Get Accuracy from test set, 2: Get Sentimental Analysis"
    )
    option = int(input())
    if (option == 1):
        print(p.get_accuracy(no_of_nearest_neighbor))
    else:
        if (option == 2):
            print('Input string to get sentiments:')
            input_data = input()
            print(p.get_sentiments(input_data, no_of_nearest_neighbor))
        else:
            print("Invalid option")
Example #11
0
def test_cbir():
    x_train, x_test, labels = load_dataset(os.getcwd(), False)
    encoder_model = De_Conv_Autoencoder()
    encoder_model.build_auto_encoder()
    encoder_model.compile()
    encoder_model.load()

    query_img = img_as_float(cv2.cvtColor(cv2.resize(cv2.imread( "test_img.jpg"), (124,124)), cv2.COLOR_BGR2GRAY))
    #print(type(query_img)
    print(query_img.shape)
    img_feat_vect  = extract_feat_query(encoder_model, query_img)
    predicted_cluster = cluster_query_img(img_feat_vect)

    plt.subplot(2,1,1)
    plt.imshow(query_img.reshape(124,124))
    plt.gray()
    #plt.set_axis_off()
    plt.show()
    plt.close()
    plt.savefig("sample.jpg")

    # initlize feature index
    index = FeaturesIndex()
    index.load()
    for k in index.feat_index:
        print(k," ---> ", index.feat_index[k])
    print(predicted_cluster)
    imgs_in_cluster = index.feat_index.get(predicted_cluster[0])
    print(list(set(imgs_in_cluster)))
    print(len(list(set(imgs_in_cluster))))
    plot_sample_imgs(x_train, list(set(imgs_in_cluster)))
Example #12
0
def main():
    train_set = []
    for f in ["train.txt", "dev.txt"]:
        file = join(dirname(dirname(dirname(__file__))), "data", "vlsp2016",
                    "corpus", f)
        train_set += load_dataset(file)

    train_set = train_set[:100]

    start = time.time()
    transformer = tagged.TaggedTransformer(template)
    X1, y1 = transformer.transform(train_set)
    end = time.time()
    py = end - start
    # py = 2.34531

    start = time.time()
    transformer = tagged_cython.TaggedTransformer(template)
    X2, y2 = transformer.transform(train_set)
    end = time.time()
    cy = end - start

    print("Python:", py)
    print("Cython:", cy)
    print("Cython is {:0.3f}x faster ^-^".format(py / cy))
Example #13
0
def main():
    vocab, train_dataset, val_dataset, test_dataset = load_dataset(args.train_file, args.val_file, args.test_file)
    glove_twitter = nlp.embedding.create('glove', source=args.embedding_source, unknown_token='<unk>',
                                         init_unknown_vec=mx.nd.random_uniform)
    vocab.set_embedding(glove_twitter)
    ctx = mx.cpu()  ## or mx.gpu(N) if GPU device N is available

    train_classifier(vocab, train_dataset, val_dataset, test_dataset, ctx)
Example #14
0
def run_model(model_name):

    vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset(
    )
    learning_rate = config.learning_rate
    batch_size = config.batch_size
    output_size = config.output_size
    hidden_size = config.hidden_size
    embedding_length = config.embedding_length

    epochs = config.epochs

    in_channels = config.in_channels
    out_channels = config.out_channels
    kernel_heights = config.kernel_heights
    stride = config.stride
    padding = config.padding
    keep_probab = config.keep_probab

    if model_name == 'CNN':
        model = CNN.CNN(batch_size, output_size, in_channels, out_channels,
                        kernel_heights, stride, padding, keep_probab,
                        vocab_size, embedding_length, word_embeddings)

    elif model_name == 'LSTM':
        model = LSTM_Attn.AttentionModel(batch_size, output_size, hidden_size,
                                         vocab_size, embedding_length,
                                         word_embeddings)

    loss_fn = F.cross_entropy
    path = "Saved Models/"
    for epoch in range(epochs):
        train_loss, train_acc = train_model(model, train_iter, epoch, loss_fn)
        val_loss, val_acc, y_test, y_pred = eval_model(model, valid_iter,
                                                       loss_fn)
        _, f, o = helper.getResult(y_test, y_pred)
        current_f1 = f['f1-score']
        checkpoint_model(model, path, current_f1, epoch + 1, model_name, 'max')
        print(
            f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%'
        )

    load_saved_model(model, path + '{}_best.pth'.format(model_name))
    test_loss, test_acc, y_test, y_pred = eval_model(model, test_iter, loss_fn)
    print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')

    print(
        "                                Overall               #               Fake                "
    )
    print(
        "                   precision    recall      f1-score  #  precision    recall      f1-score"
    )
    _, f, o = helper.getResult(y_test, y_pred)
    res = helper.printResult(model_name, o, f)
    print(res)
    path = model_name + "_results.txt"
    helper.saveResults(path, res)
Example #15
0
    def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE,
             img_channels=3):
        # 加载数据集到内存
        images, labels, face_num = load_dataset(self.path_name)
        self.nb_classes = face_num

        train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size=0.3,
                                                                                  random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(images, labels, test_size=0.5,
                                                          random_state=random.randint(0, 100))

        # 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,否则:rows,cols,channels
        # 这部分代码就是根据keras库要求的维度顺序重组训练数据集
        if K.image_dim_ordering() == 'th':
            train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
            valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
            test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
            valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

            # 输出训练集、验证集、测试集的数量
            print(train_images.shape[0], 'train samples')
            print(valid_images.shape[0], 'valid samples')
            print(test_images.shape[0], 'test samples')

            '''
            我们的模型使用categorical_crossentropy作为损失函数,因此需要根据类别数量nb_classes将
            类别标签进行one-hot编码使其向量化,在这里我们的类别只有两种,经过转化后标签数据变为二维
            '''
            train_labels = np_utils.to_categorical(train_labels, self.nb_classes)
            valid_labels = np_utils.to_categorical(valid_labels, self.nb_classes)
            test_labels = np_utils.to_categorical(test_labels, self.nb_classes)

            # 像素数据浮点化以便归一化
            train_images = train_images.astype('float32')
            valid_images = valid_images.astype('float32')
            test_images = test_images.astype('float32')

            # 将其归一化,图像的各像素值归一化到0~1区间
            train_images /= 255
            valid_images /= 255
            test_images /= 255

            self.train_images = train_images
            self.valid_images = valid_images
            self.test_images = test_images
            self.train_labels = train_labels
            self.valid_labels = valid_labels
            self.test_labels = test_labels
 def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, 
          img_channels = 3, nb_classes = 2):
     #載入資料集到記憶體
     images, labels = load_dataset(self.path_name)        
     
     train_images, valid_images, train_labels, valid_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100))        
     _, test_images, _, test_labels = train_test_split(images, labels, test_size = 0.5, random_state = random.randint(0, 100))                
     
     #當前的維度順序如果為'th',則輸入圖片資料時的順序為:channels,rows,cols,否則:rows,cols,channels
     #這部分程式碼就是根據keras庫要求的維度順序重組訓練資料集
     #if K.image_dim_ordering() == 'th':
     if K.image_data_format() == "channels_first":  #Keras新版寫法
         train_images = train_images.reshape(train_images.shape[0], img_channels, img_rows, img_cols)
         valid_images = valid_images.reshape(valid_images.shape[0], img_channels, img_rows, img_cols)
         test_images = test_images.reshape(test_images.shape[0], img_channels, img_rows, img_cols)
         self.input_shape = (img_channels, img_rows, img_cols)            
     else:
         train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
         valid_images = valid_images.reshape(valid_images.shape[0], img_rows, img_cols, img_channels)
         test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
         self.input_shape = (img_rows, img_cols, img_channels)            
         
         #輸出訓練集、驗證集、測試集的數量
         print(train_images.shape[0], 'train samples')
         print(valid_images.shape[0], 'valid samples')
         print(test_images.shape[0], 'test samples')
     
         #我們的模型使用categorical_crossentropy作為損失函式,因此需要根據類別數量nb_classes將
         #類別標籤進行one-hot編碼使其向量化,在這裡我們的類別只有兩種,經過轉化後標籤資料變為二維
         train_labels = np_utils.to_categorical(train_labels, nb_classes)                        
         valid_labels = np_utils.to_categorical(valid_labels, nb_classes)            
         test_labels = np_utils.to_categorical(test_labels, nb_classes)                        
     
         #畫素資料浮點化以便歸一化
         train_images = train_images.astype('float32')            
         valid_images = valid_images.astype('float32')
         test_images = test_images.astype('float32')
         
         #將其歸一化,影象的各畫素值歸一化到0~1區間
         train_images /= 255
         valid_images /= 255
         test_images /= 255            
     
         self.train_images = train_images
         self.valid_images = valid_images
         self.test_images  = test_images
         self.train_labels = train_labels
         self.valid_labels = valid_labels
         self.test_labels  = test_labels
def _test_helper_load(dataset_name, log_folder):
    # Load pairwise results including node-node matching matrix,
    log_folder = join(get_model_path(), 'Our', 'logs', log_folder)
    ld = load(join(log_folder, 'final_test_pairs.klepto'))
    pairs = ld['test_data_pairs']
    print(len(pairs), 'pairs loaded')
    # Load graphs.
    dataset = load_dataset(dataset_name, 'all', 'mcs', 'bfs')  # TODO: check bfs assumption
    dataset.print_stats()
    natts, *_ = get_dataset_conf(dataset_name)
    # node_feat_name = natts[0] if len(natts) >= 1 else None  # TODO: only one node feat
    from node_feat import encode_node_features
    dataset, _ = encode_node_features(dataset)
    # TODO: should really load and reset flags but since encode_node_features only uses 'one_hot' it is fine for now
    return pairs, dataset
Example #18
0
def runFromScratch():
    # fix random seed for reproducibility
    seed = 7
    numpy.random.seed(seed)

    evaluation_dir = '/home/himanshu/Anjali/experiments/e80_20/evaluation'

    print('Loading dataset...')
    X_train, y_train, X_test, y_test = load_dataset(threshold=0.2)
    # seaborn.countplot(y_train)
    # seaborn.countplot(y_test)

    # prepare the image for the VGG model
    X_train, X_test = preprocessDataset(X_train, X_test)

    model = defineModel()

    # compile model
    optimizr = optimizers.Adam(lr=0.0001)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizr,
                  metrics=['accuracy'])
    # model.compile( loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', mean_pred] )

    # fit model
    numEpochs = 20
    batchSize = 12
    print('Fitting the model...')
    createLogFile(evaluation_dir)
    csv_logger = CSVLogger(path.join(evaluation_dir, 'log.csv'),
                           append=True,
                           separator=';')
    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        epochs=numEpochs,
                        batch_size=batchSize,
                        verbose=1,
                        callbacks=[csv_logger])

    # save model architecture and weights
    saveModelArchWeights(model, evaluation_dir)

    # save plots to disk
    savePlots(history, evaluation_dir)

    # evaluate model & print accuracy
    evaluateModel(X_test, y_test, model)
Example #19
0
def main():
    x_train, y_train, x_test, y_test = load_dataset()
    print('Training data shape: ', x_train.shape, '     Train labels shape: ',
          y_train.shape)
    print('Test data shape:     ', x_test.shape, '     Test labels shape: ',
          y_test.shape)
    print()

    classifier = SoftmaxClassifier()
    loss, grad = classifier.cross_entropy_loss(x_train, y_train, 1e-5)

    # Gradient check for the model
    f = lambda w: classifier.cross_entropy_loss(x_train, y_train, 0.0)[0]
    print('Gradient Check:')
    grad_check(f, classifier.W, grad, 10)
    print()

    # Plot the loss for the training
    loss_record = classifier.train(x_train, y_train, lr=1e-6, reg=1e4)
    plt.plot(loss_record)
    plt.xlabel('Iteration number')
    plt.ylabel('Loss value')
    plt.show()

    # Evaluation on test set
    y_test_pred = classifier.predict(x_test)
    accuracy = np.mean(y_test == y_test_pred)
    print('Accuracy of the Softmax classifier on the test set: %f' % accuracy)

    # Visualize the learned weights for each class
    w = classifier.W[:, :-1]  # Strip out the bias
    w = w.reshape(10, 32, 32, 3)

    w_min, w_max = np.min(w), np.max(w)

    classes = [
        'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship',
        'truck'
    ]
    for i in range(10):
        plt.subplot(2, 5, i + 1)

        # Rescale the weights to be between 0 and 255 for image representation
        w_img = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min)
        plt.imshow(w_img.astype('uint8'))
        plt.axis('off')
        plt.title(classes[i])
    plt.show()
Example #20
0
    def load(self, img_rows=IMAGE_SIZE, img_cols=IMAGE_SIZE, img_channels=3):
        # 装载数据集
        images, labels, face_num = load_dataset(self.path_name)
        self.nb_classes = face_num

        # 按交叉验证的原则分割训练集和测试集
        train_images, _, train_labels, _ = train_test_split(
            images, labels, test_size=0.1, random_state=random.randint(0, 100))
        _, test_images, _, test_labels = train_test_split(
            images, labels, test_size=0.1, random_state=random.randint(0, 100))

        # 判断后端系统:TensorFlow/Theano,重组训练数据集
        if K.image_data_format() == 'channels_first':
            train_images = train_images.reshape(train_images.shape[0],
                                                img_channels, img_rows,
                                                img_cols)
            test_images = test_images.reshape(test_images.shape[0],
                                              img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0],
                                                img_rows, img_cols,
                                                img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows,
                                              img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

        # 输出训练集和测试集的数量
        print(train_images.shape[0], 'train samples')
        print(test_images.shape[0], 'test samples')

        # 根据类别数量nb_classes将类别标签进行one-hot编码使其向量化
        train_labels = np_utils.to_categorical(train_labels, self.nb_classes)
        test_labels = np_utils.to_categorical(test_labels, self.nb_classes)

        # 像素数据浮点化以便归一化
        train_images = train_images.astype('float32')
        test_images = test_images.astype('float32')

        # 归一化,图像的各像素值归一化到0~1区间
        train_images /= 255
        test_images /= 255

        self.train_images = train_images
        self.test_images = test_images
        self.train_labels = train_labels
        self.test_labels = test_labels
Example #21
0
def main():
    """Defines an application's main functionality"""

    log.info("Started.")

    base_path = Path(config.base_dir)
    if base_path.exists() is False:
        base_path.mkdir(exist_ok=True)

    (x_train, y_train), (x_test,
                         y_test), (word2id,
                                   id2word), (tag2id, id2tag) = load_dataset(
                                       config.corpus,
                                       test_ratio=0.1,
                                       data_dir=config.base_dir)
    voc_size = len(word2id)
    num_tags = len(id2tag)

    y_train_oh = keras.utils.np_utils.to_categorical(y_train, num_tags)
    y_test_oh = keras.utils.np_utils.to_categorical(y_test, num_tags)

    log.info("Data information")
    log.info("Size of training set: %d" % (x_train.shape[0]))
    log.info("Shape of training set: %s" % (repr(x_train.shape)))
    log.info("Size of test set: %d" % (x_test.shape[0]))
    log.info("Number of unique wordss: %d" % (len(word2id)))
    log.info("Number of unique tags: %d" % (num_tags))
    log.info("Weights path: %s" % (config.weights_path))

    if config.use_embedding is False:
        model = model_architecture.build_model(num_tags)
    else:
        model = model_architecture.build_model_with_embedding(
            num_tags, voc_size, config.sample_dimension)

    model.compile(optimizer='adam',
                  loss=keras.losses.categorical_crossentropy,
                  metrics=['categorical_accuracy'])

    model.fit(x=x_train,
              y=y_train_oh,
              validation_data=(x_test, y_test_oh),
              batch_size=128,
              epochs=config.epochs,
              verbose=1)  # Use progress bar

    model.save_weights(config.weights_path)
def build_index():

    de_conv_encoder = De_Conv_Autoencoder()
    de_conv_encoder.build_auto_encoder()
    de_conv_encoder.compile()
    de_conv_encoder.load()

    x_train, x_test, labels = load_dataset(os.getcwd(), False)
    feat_vect_list = []
    #for i in range(len(x_train)):
    feat_vect_list = extract_feature(de_conv_encoder, x_train)

    n_clusters = 100
    km_model = trigger_clustering(labels,
                                  np.array(feat_vect_list),
                                  count=n_clusters)
    Index_histogram(km_model, n_clusters)
def set_up(filename, melt=True):
    data = load_data.load_dataset(filename)
    col_names = {
        0: "Fz",
        1: "C3",
        2: "Cz",
        3: "C4",
        4: "CP1",
        5: "CPz",
        6: "CP2",
        7: "Pz"
    }
    df = pd.DataFrame.from_dict(data["y"])
    df.rename(columns=col_names, inplace=True)
    df_trig = pd.DataFrame.from_dict(data["trig"])
    df_trig.rename(columns={0: "trigger"}, inplace=True)
    df_full = pd.concat([df_trig, df.reindex(df.index)], axis=1)
    df_full.reset_index(inplace=True)
    df_full.rename(columns={'index': 'time'}, inplace=True)
    df_full['filename'] = filename
    df_full['subject'] = filename[0:2]
    df_full['condition'] = filename[3:].replace(".mat", "")
    df_full['data_quality'] = df_full['condition'].apply(
        lambda x: 'high' if 'high' in x else 'low')

    triggers = return_trig_dict(data['trig'])
    df_full['seq_type'] = df_full['time'].apply(
        lambda x: get_sequence_info(triggers, x, 'seq_type'))
    df_full['seq_index'] = df_full['time'].apply(
        lambda x: get_sequence_info(triggers, x, 'seq_index'))
    df_full['seq_start'] = df_full['time'].apply(
        lambda x: get_sequence_info(triggers, x, 'seq_start'))
    df_full['seq_time'] = df_full['time'] - df_full['seq_start']

    if melt == True:
        df_full = pd.melt(
            df_full,
            id_vars=[
                'time', 'trigger', 'filename', 'subject', 'condition',
                'data_quality', 'seq_type', 'seq_index', 'seq_start',
                'seq_time'
            ],
            value_vars=['Fz', 'C3', 'Cz', 'C4', 'CP1', 'CPz', 'CP2', 'Pz'],
            var_name='electrode',
            value_name='amplitude')
    return df_full
Example #24
0
def main():

    load_checkpoint = "./bin/2021-Apr-21-20-53-26-lstm2/E9.pytorch"

    batch_size = 32
    output_size = 2
    hidden_size = 256
    embedding_length = 300

    num_samples = 1

    TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset(
    )

    model = LSTMClassifier2(batch_size, output_size, hidden_size, vocab_size,
                            embedding_length, word_embeddings)

    if not os.path.exists(load_checkpoint):
        raise FileNotFoundError(load_checkpoint)

    model.load_state_dict(torch.load(load_checkpoint))
    print("Model loaded from %s" % load_checkpoint)

    if torch.cuda.is_available():
        model = model.cuda()

    model.eval()

    samples, z = model.inference(n=num_samples)

    print('----------SAMPLES----------')
    print(*idx2word(samples, i2w=TEXT.vocab.itos, pad_idx=model.pad_idx),
          sep='\n')
    exit()

    z1 = torch.randn([model.latent_size]).numpy()
    z2 = torch.randn([model.latent_size]).numpy()

    z = to_var(
        torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float())

    samples, _ = model.inference(z=z)

    print('-------INTERPOLATION-------')
    print(*idx2word(samples, i2w=TEXT.vocab.itos, pad_idx=model.pad_idx),
          sep='\n')
Example #25
0
def main(train_data_path: str, model_path: str):
    TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset(
        train_data_path)

    batch_size = 32
    output_size = 2
    hidden_size = 256
    embedding_length = 300

    # TODO: try other types of learning algorithms
    model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size,
                           embedding_length, word_embeddings)

    for epoch in range(10):
        train_loss, train_acc = train_model(model, train_iter, epoch)
        val_loss, val_acc = eval_model(model, valid_iter)

        print(
            f'Epoch: {epoch + 1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%'
        )

    test_loss, test_acc = eval_model(model, test_iter)
    print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')
    ''' Let us now predict the sentiment on a single sentence just for the testing purpose. '''
    test_sen1 = "This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues."

    test_sen1 = TEXT.preprocess(test_sen1)
    test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]]

    test_sen = np.asarray(test_sen1)
    test_sen = torch.from_numpy(test_sen)
    if torch.cuda.is_available():
        test_sen = test_sen.cuda()
    model.eval()
    output = model(test_sen, 1)
    out = F.softmax(output, 1)
    if (torch.argmax(out[0]) == 1):
        print("Sentiment: Positive")
    else:
        print("Sentiment: Negative")

    # save the model
    torch.save(model.state_dict(), model_path)
Example #26
0
def runSavedModel():
    # fix random seed for reproducibility
    seed = 7
    numpy.random.seed(seed)

    evaluation_dir = '/home/himanshu/Anjali/experiments/e10fold/evaluation'

    print('Loading dataset...')
    X, y = load_dataset()
    # seaborn.countplot(y_train)
    # seaborn.countplot(y_test)

    # prepare the image for the ResNet50 model
    X_train, X_test = preprocessDataset(X_train, X_test)

    # load the model architecture and weights
    model = loadModelArchWeights(evaluation_dir)

    # compile model
    optimizr = optimizers.Adam(lr=0.001)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizr,
                  metrics=['accuracy'])
    # model.compile( loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', mean_pred] )

    # # fit model
    # numEpochs = 15
    # batchSize = 6
    # print('Fitting the model...')
    # createLogFile(evaluation_dir)
    # csv_logger = CSVLogger(path.join(evaluation_dir,'log.csv'), append=True, separator=';')
    # history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=numEpochs, batch_size=batchSize, verbose=1, callbacks=[csv_logger])
    # # save model architecture and weights
    # saveModelArchWeights(model, evaluation_dir)
    # # save plots to disk
    # savePlots(history, evaluation_dir)

    # evaluate model & print accuracy
    [y_predicted, y_probability, scores] = evaluateModel(X_test, y_test, model)

    # save predictions to disk
    savePredictions2disk(y_test, y_predicted, y_probability, evaluation_dir)
Example #27
0
def rerun_from_loaded_logs(dataset_name, log_folder, theta):
    from utils import get_model_path, load
    from load_data import load_dataset
    from pprint import pprint
    print('theta {}'.format(theta))

    log_folder = join(get_model_path(), 'Our', 'logs', log_folder)
    ld = load(join(log_folder, 'final_test_pairs.klepto'))
    pairs = ld['test_data_pairs']

    dataset = load_dataset(dataset_name, 'all', 'mcs', 'bfs')

    # regenerate y_true_dict_list
    for gids in pairs.keys():
        gid1, gid2 = gids
        g1 = dataset.look_up_graph_by_gid(gid1)
        g2 = dataset.look_up_graph_by_gid(gid2)
        pair_true = dataset.look_up_pair_by_gids(gid1, gid2)
        pair = pairs[gids]
        pair.assign_g1_g2(g1, g2)
        pair.assign_y_true_dict_list(pair_true.get_y_true_list_dict_view())

    # construct flags
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--only_iters_for_debug', type=int, default=None)
    parser.add_argument('--dataset', default=dataset_name)
    parser.add_argument('--align_metric', default='mcs')
    parser.add_argument('--theta', type=float, default=theta)
    parser.add_argument('--debug', type=bool, default='debug' in dataset_name)
    FLAGS = parser.parse_args()

    # call prediction code
    pair_list = [pairs[gids] for gids in pairs.keys()]
    global_result = eval_pair_list(pair_list, FLAGS)

    pprint(global_result)
    fn = join(log_folder, 'updated_results_theta_{}.txt'.format(theta))
    with open(fn, 'w') as f:
        pprint(global_result, stream=f)
Example #28
0
def main(args):

    TEXT, LABEL, vocab_size, word_embeddings, train_iter, valid_iter = load_data.load_dataset(
        args)

    #learning_rate = 2e-5
    learning_rate = 0.00001
    batch_size = BATCH_SIZE
    output_size = 2
    # hidden_size = 256
    hidden_size = 64
    embedding_length = 200

    #model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings)
    model = AttentionModel(batch_size, output_size, hidden_size, vocab_size,
                           embedding_length, word_embeddings)
    #model = SelfAttention(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings)
    #loss_fn = F.cross_entropy
    print(LABEL.vocab.stoi)
    print(LABEL.vocab.freqs)
    print(LABEL)
    label_weights = torch.FloatTensor(np.asarray([1.0, 2.0]))
    label_weights_tensor = Variable(label_weights, volatile=True).cuda()
    loss_fn = torch.nn.CrossEntropyLoss(weight=label_weights_tensor)
    #loss_fn = torch.nn.CrossEntropyLoss()

    for epoch in range(10):
        train_loss, train_acc = train_model(model, loss_fn, train_iter, epoch)
        val_loss, val_acc = eval_model(model, loss_fn, valid_iter)

        print(
            'Epoch: %d, Train Loss: %.3f, Train Acc: %.2f, Val Loss: %.3f, Val Acc: %.2f'
            % (epoch + 1, train_loss, train_acc, val_loss, val_acc))
        evaluate(model, TEXT, LABEL, args.train_data_tsv_file, epoch)
        evaluate(model, TEXT, LABEL, args.val_data_tsv_file, epoch)
        #torch.save(model.state_dict(), args.save_model_file+'.epoch'+str(epoch+1))

    test_loss, test_acc = eval_model(model, loss_fn, valid_iter)
    print('Test Loss: %.3f, Test Acc: %.2f' % (test_loss, test_acc))
Example #29
0
    def fileopen(self):
        global v1, clean_review, train, test
        filename = fdialog.askopenfilename(filetypes=(("TSV Files", "*.tsv"),
                                                      ("All Files", "*.*")))
        name = filename
        path = StringVar()
        path.set(name)
        self.Path_Field.configure(textvariable=path)
        try:
            file = io.StringIO()
            with redirect_stdout(file):
                obj_load = load_dataset(name)
                df = obj_load.load()
                obj_review = praproses(df['review'])
                df['review'] = obj_review.cleaning()
            output = file.getvalue()

            obj_split = fold(df)
            train, test = obj_split.train_test()

            self.Scrolledtext1.configure(state='normal')
            self.Scrolledtext1.insert(END, output)
            self.Scrolledtext1.configure(state='disabled')

            v1 = IntVar()
            self.BOW.configure(state='normal',
                               variable=v1,
                               value=1,
                               command=self.vsm)
            self.D2V.configure(state='normal',
                               variable=v1,
                               value=2,
                               command=self.vsm)
        except:
            self.Scrolledtext1.configure(state='normal')
            self.Scrolledtext1.insert(
                END, "Tidak bisa membaca file yang dimuat.\n")
            self.Scrolledtext1.configure(state='disabled')
import os
import time

import theano
from theano import tensor as T
import numpy as np
from load_data import load_dataset
import lasagne
import matplotlib.pyplot as plt

# print("Loading data...")
# X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
# print(X_train.shape)
# print(y_train.shape)
# print(X_val.shape)
# print(y_val.shape)
# print(X_test.shape)
# print(y_test.shape)



print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
plt.figure(figsize=(12,3))
for i in range(10):
    plt.subplot(1, 10, i+1)
    plt.imshow(X_train[i].reshape((28, 28)), cmap='gray', interpolation='nearest')
    plt.axis('off')
    plt.savefig('digits.png',bbox_inches='tight')

Example #31
0
import warnings
from os.path import join, dirname

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

from load_data import load_dataset
from model import ClassifierModel

warnings.filterwarnings("ignore")

if __name__ == '__main__':
    train_path = join(dirname(dirname(__file__)), "data", "train.xlsx")
    test_path = join(dirname(dirname(__file__)), "data", "test.xlsx")
    X_train, y_train = load_dataset(train_path)
    X_test, y_test = load_dataset(test_path)

    models = [
        ClassifierModel("Tfidf Bigram", TfidfVectorizer(ngram_range=(1, 2))),
        ClassifierModel("Tfidf Trigram", TfidfVectorizer(ngram_range=(1, 3))),
        ClassifierModel("Count Bigram", CountVectorizer(ngram_range=(1, 2))),
        ClassifierModel("Count Trigram", CountVectorizer(ngram_range=(1, 3)))
    ]

    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel("Count Max Feature {}".format(n),
                                CountVectorizer(max_features=n))
        models.append(model)

    for n in [2000, 5000, 10000, 15000, 20000]:
        model = ClassifierModel("Count Max Feature {}".format(n),
Example #32
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument('-n', '--num-epochs', type=int, default=20)
    parser.add_argument('-f', '--model-file', help="model file")
    parser.add_argument('--no-separate', help='split the data', action='store_true')
    parser.add_argument('--second-part', help='take second part of data instead of the first', action='store_true')
    parser.add_argument('-b', '--batch-size', type=int, default=64)
    parser.add_argument('-l', '--learning-rate', type=float, default=0.01)
    parser.add_argument('-t', '--test-only', action='store_true')
    parser.add_argument('-T', '--train-from-layer', help='only train on this layer and those layers after it, \
    don\'t update weights of layers before this layer')
    parser.add_argument('-p', '--prefix', help='prefix to add at the beginning of model save file')

    args = parser.parse_args()

    model = args.model
    batch_size = args.batch_size
    separate = not args.no_separate
    model_file = args.model_file
    num_epochs = args.num_epochs
    learning_rate = args.learning_rate
    save_file_name = model + '_model'
    test_only = args.test_only
    load_first_part = not args.second_part
    train_from_layer = args.train_from_layer
    prefix = args.prefix

    if test_only and not model_file:
        print('you need to specify a model file to test')
        exit()

    if separate:
        if load_first_part:
            save_file_name = 'firsthalf_' + save_file_name
        else:
            save_file_name = 'secondhalf_' + save_file_name
        nOutput = 5
    else:
        nOutput = 10

    if train_from_layer:
        save_file_name = 'from_' + train_from_layer + save_file_name

    if prefix:
        save_file_name = prefix + save_file_name
    else:
        save_file_name = str(random.randint(10000, 99999)) + '_' + save_file_name

    logfile = save_file_name + '_log.txt'
    log_print = functools.partial(log_and_print, logfile=logfile)
    log_print('--Parameter--')
    log_print('  model={}'.format(model))
    log_print('  batch_size={}'.format(batch_size))
    log_print('  num_epochs={}'.format(num_epochs))
    log_print('  learning_rate={}'.format(learning_rate))
    log_print('  separate data :{}'.format(separate))
    if separate:
        s = '    take first or second part of data :' + ('first' if load_first_part else 'second')
        log_print(s)
    log_print('  model_file :{}'.format(model_file))
    log_print('  nOutput = {}'.format(nOutput))
    log_print('  model will be saved to : {}'.format(save_file_name + '*.npz'))
    log_print('  log will be saved to : {}'.format(logfile))
    log_print('  test only :{}'.format(test_only))
    log_print('  only train from this layer : {}'.format(train_from_layer))
    log_print('  prefix to save file : {}'.format(prefix))

    log_print('')

    log_print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part)

    log_print('{} train images'.format(len(X_train)))
    log_print('{} val images'.format(len(X_val)))
    log_print('{} test images'.format(len(X_test)))

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    log_print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput, input_var)

    prediction = lasagne.layers.get_output(net_output)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    if train_from_layer:
        layers_to_train = lasagne.layers.get_all_layers(net_output, treat_as_input=[net[train_from_layer]])
        params = get_all_params_from_layers(layers_to_train, trainable=True)
    else:
        params = lasagne.layers.get_all_params(net_output, trainable=True)

    updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9)

    test_prediction = lasagne.layers.get_output(net_output, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()

    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    if not test_only:
        log_print("Starting training...")

        for epoch in range(num_epochs):

            train_err = 0
            train_batches = 0
            start_time = time.time()
            print("Training stage:")
            for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
                time_batch = time.time()
                inputs, targets = batch
                this_train_err = train_fn(inputs, targets)
                train_err += this_train_err
                train_batches += 1
                print('train batch', train_batches, 'err+=', this_train_err,
                      '{:.2f}'.format(time.time() - time_batch), 'seconds')

            val_err = 0
            val_acc = 0
            val_batches = 0
            print("Validation stage ..")
            for batch in load_data.iterate_minibatches(X_val, y_val, batch_size, shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            # Then we print the results for this epoch:
            log1 = "Epoch {} of {} took {:.3f}m".format(epoch + 1, num_epochs, (time.time() - start_time) / 60.)
            log2 = "  training loss:\t\t{:.6f}".format(train_err / train_batches)
            log3 = "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
            log4 = "  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)
            log_print(log1)
            log_print(log2)
            log_print(log3)
            log_print(log4)

            # Optionally, you could now dump the network weights to a file like this:

            model_file = save_file_name + str(epoch) + '.npz'
            log_print('model saved to ' + model_file)
            model_io.save_model(model_file, net_output)

    log_print('testing network ...')
    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    log_print("Final results:")
    log_print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    log_print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))
Example #33
0
import load_data as ld
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipdb
import matplotlib
matplotlib.style.use('ggplot')



csvfile = "../Exercise.xlsx"
df = ld.load_dataset(csvfile)
list_df = ld.separate_diet(df)

for diet in list_df:
    mean_time = diet.mean(axis=1).plot(label="Diet"+str(np.unique(diet["Diet"])))

plt.legend()
plt.savefig("mean.pdf")



plt.figure()    
for diet in list_df:
    diet_string = str(np.unique(diet["Diet"])[0])
    diet.to_csv("chicken_weights_"+diet_string+".csv")
    weights_colnames = list(diet.columns.values)
    weights_colnames.remove('Diet')
    diet = diet.reset_index()
    plt.figure()
    ax = diet.plot(kind="scatter", x="Time", y=0) 
Example #34
0
def main():
    """
    Main function of the project.
    """
    args = init_parser().parse_args()

    random.seed(490)

    dict_info = {}

    # Problem's definition
    dict_info['depot'] = model.Point(args.depot[0], args.depot[1])
    width, height = 300, 300
    ind_size = args.vehicle * args.node
    dict_info['zoom'] = args.zoom

    # Genetic parameter
    crossover_probability = args.crossover
    mutation_probability = args.mutation
    ngen = args.generation
    _mu = args.size
    _lambda = args.size

    # Generate a the problem's data set
    # i.e: Generate N "route" of appointement
    #list_appointment = model.generate_route(num_route,
    #        num_node_per_route,
    #        width,
    #        height,
    #        dict_info['depot'])

    dict_info['data'] = load_data.load_dataset(args.path)
    # Set the routes color
    dict_info['color'] = visualisation.color_group(args.vehicle)

    toolbox = init_toolbox(
            ind_size,
            args.vehicle,
            dict_info['data'],
            dict_info['depot'])

    # Create the global population
    # And an elite one

    pop = toolbox.population(n=args.size)
    hof = tools.HallOfFame(args.elite)

    # Create a statistic module to display stats at each generation
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", numpy.mean, axis=0)
    stats.register("std", numpy.std, axis=0)
    stats.register("min", numpy.min, axis=0)
    stats.register("max", numpy.max, axis=0)

    root = visualisation.Tk()
    root.geometry(str(width) + "x" + str(height))

    # The genetic algorithm in itself
    algorithms.eaMuPlusLambda(pop,
            toolbox,
            _mu,
            _lambda,
            crossover_probability,
            mutation_probability,
            ngen,
            stats=stats,
            halloffame=hof)

    dict_info['tour'] = visualisation.individual_as_appointment(
            hof[0],
            dict_info['data']['appointment']
            )

    # Create display of the problem and of the best solution
    visualisation.Example(root, dict_info)

    # Start the GUI main loop
    root.mainloop()
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument("model_file", help="model file")
    parser.add_argument('layer', help='layer name to get output')
    parser.add_argument('--no-separate', help='split the data', action='store_true')
    parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true')
    parser.add_argument('-b', '--batch-size', type=int, default=64)
    parser.add_argument('-n', '--data-num', type=int)

    args = parser.parse_args()

    model = args.model
    batch_size = args.batch_size
    separate = not args.no_separate
    model_file = args.model_file
    layer_name = args.layer
    load_first_part = args.first_part
    data_num = args.data_num

    filename = model + '_' + layer_name + '_output.save'
    print('--Parameters--')
    print('  model         : ', model)
    print('  layer name    : ', layer_name)
    print('  batch_size    : ', batch_size)
    print('  model_file    : ', model_file)
    print('  middle output will be saved to : ', filename)
    print('  separate data :', separate)
    if separate:
        print('    take first or second part of data :', 'first' if load_first_part else 'second')
    print('batch_size=', batch_size)

    if separate:
        nOutput = 5
    else:
        nOutput = 10

    # Load the dataset
    print("Loading data...")
    X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part)
    if data_num:
        X_train = X_train[:data_num]
        y_train = y_train[:data_num]
        X_val = X_val[:data_num]
        y_val = y_val[:data_num]
        X_test = X_test[:data_num]
        y_test = y_test[:data_num]

    print(len(X_train), 'train images')
    print(len(X_val), 'val images')
    print(len(X_test), 'test images')

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput, input_var)

    # middle_output = theano.function([input_var], net[layer_name].output)
    print("Getting middle output...")

    output = lasagne.layers.get_output(net[layer_name])
    get_output = theano.function([input_var], output.flatten(2))

    output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name]))
    print('layer ' + layer_name + ' shape :', output_shape)

    all_train_output = []
    all_train_y = []
    all_test_output = []
    all_test_y = []
    print('getting from train')
    for batch in load_data.iterate_minibatches(X_train, y_train, batch_size, shuffle=False):
        print('.', end='', flush=True)
        inputs, targets = batch
        batch_output = get_output(inputs)  # a numpy ndarray
        all_train_output.extend(batch_output.tolist())
        all_train_y.extend(targets.tolist())
    print()
    print('getting from test')
    for batch in load_data.iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
        print('.', end='', flush=True)
        inputs, targets = batch
        batch_output = get_output(inputs)  # a numpy ndarray
        all_test_output.extend(batch_output.tolist())
        all_test_y.extend(targets.tolist())
    print()

    print("train output shape : ", np.array(all_train_output).shape)
    print("train y shape : ", np.array(all_train_y).shape)
    print("test output shape : ", np.array(all_test_output).shape)
    print("test y shape : ", np.array(all_test_y).shape)

    with open(filename, 'wb') as f:
        pickle.dump([all_train_output, all_train_y, all_test_output, all_test_y], f, protocol=pickle.HIGHEST_PROTOCOL)
    print('... saved to ', filename)
            lasagne.layers.dropout(network, p=.5),
            num_units=256,
            nonlinearity=lasagne.nonlinearities.rectify)

    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
    network = lasagne.layers.DenseLayer(
            lasagne.layers.dropout(network, p=.5),
            num_units=6,
            nonlinearity=lasagne.nonlinearities.softmax)

    return network


if __name__ == '__main__':
    print("Loading Data")
    X_train, y_train, X_valid, y_valid, X_test, y_test = load_data.load_dataset("/home/prosurpa/Image/image/")

    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    print("Bulding Model")

    batch_size = 1

    network = build_simple_cnn(batch_size, input_var)

    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()

    params = lasagne.layers.get_all_params(network, trainable=True)
Example #37
0
        else:
            tile_shape.append(1)
        #print tile_shape
    if untiled_param is not None: # use the untiled param to recover the updates
        sum_of_updates = T.sum(param - untiled_param, tying_dims, keepdims=1)
        updated_untiled_param = sum_of_updates + untiled_param
    else: # just take the mean value of the parameters (i.e. averaging instead of summing the updates)
        updated_untiled_param = T.mean(param, tying_dims, keepdims=1)
    updated_param = T.tile(updated_untiled_param, tile_shape, ndim=len(dims_shared))
    return updated_param, updated_untiled_param


#############
# LOAD DATA #
#############
train_x, train_y, test_x, test_y, input_shape = load_dataset(dataset)
input_shape = (1, input_shape[2], input_shape[0], input_shape[1], batchsize) # reshaped for locally_connected layers

if use_10percent_of_dataset:
    train_x *= 256
    test_x *= 256
    nex = 5000
    ntest = nex / 10
else:
    nex = 50000
    ntest = 10000
print "training on " + str(nex) + " examples"

train_x = train_x[:nex]
test_x = test_x[:ntest]
train_y = train_y[:nex]
Example #38
0
import load_data as ld
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipdb
import matplotlib
matplotlib.style.use('ggplot')



csvfile = "../Exercise.xlsx"
df_raw = ld.load_dataset(csvfile)

group = df_raw.groupby("Diet")


for i in xrange(1,5):
    df = group.get_group(i)
    x = df["Time"]
    y = df["Weight"]
    results = np.polyfit(x,y,1)
    print "For diet:", i, "slope=", results[0]
    xx = np.arange(0, 25)
    yy = results[1] + results[0]*xx
    plt.figure()
    df.plot(kind="Scatter", x="Time", y="Weight")
    plt.plot(xx,yy)
    plt.show()

Example #39
0
from os.path import join, dirname
import time
import joblib
import pycrfsuite
from sklearn_crfsuite import metrics

from load_data import load_dataset


transformer = joblib.load(join(dirname(__file__), "model", "transformer.bin"))
path = join(dirname(__file__), "model", "model.bin")
estimator = pycrfsuite.Tagger()
estimator.open(path)

test_set = load_dataset(join(dirname(dirname(dirname(__file__))), "data", "vlsp2016", "corpus", "test.txt"))
X_test, y_test = transformer.transform(test_set)
start = time.time()
y_pred = [estimator.tag(x) for x in X_test]
end = time.time()
test_time = end - start
f1_test_score = metrics.flat_f1_score(y_test, y_pred, average='weighted')
print("F1 score: ", f1_test_score)
print("Test time: ", test_time)
with open("report.txt", "w") as f:
    f.write("F1 score: " + str(f1_test_score) + "\n" + "Test time: " + str(test_time))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="model name", choices=['cifar', 'lenet'])
    parser.add_argument("model_file", help="model file")
    parser.add_argument('layer', help='layer name to get image output')
    parser.add_argument('imageID', help='ID of image for input', type=int)
    parser.add_argument('-d', '--dataset', choices=['train', 'val', 'test'], default='test')
    parser.add_argument('--no-separate', help='split the data', action='store_true')
    parser.add_argument('--first-part', help='take first part of data instead of the second', action='store_true')
    parser.add_argument('-i', '--input', help='only get input image', action='store_true')
    parser.add_argument('-w', '--draw-weights', help='only draw weights, give the width of kernel', action='store_true')

    args = parser.parse_args()

    model = args.model
    batch_size = 1
    separate = not args.no_separate
    model_file = args.model_file
    layer_name = args.layer
    chosen_set = args.dataset
    load_first_part = args.first_part
    imageID = args.imageID
    only_input = args.input
    only_weights = args.draw_weights
    if not only_weights:
        filename = str(imageID) + '_' + model + '_' + layer_name + '_output.png'
    else:
        filename = 'weight_' + model + '_' + layer_name + '_output.png'
    print('--Parameters--')
    print('  model         : ', model)
    print('  layer name    : ', layer_name)
    print('  batch_size    : ', batch_size)
    print('  model_file    : ', model_file)
    print('  middle output images will be saved to : ', filename)
    print('  separate data :', separate)
    if separate:
        print('    take first or second part of data :', 'first' if load_first_part else 'second')
    print('batch_size=', batch_size)

    if separate:
        nOutput = 5
    else:
        nOutput = 10

    # Load the dataset
    print("Loading data...")
    if not only_weights:
        if only_input:
            X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part,
                                                                                    substract_mean=False)
        else:
            X_train, y_train, X_val, y_val, X_test, y_test = load_data.load_dataset(model, separate, load_first_part)

        print(len(X_train), 'train images')
        print(len(X_val), 'val images')
        print(len(X_test), 'test images')

        print('getting from' + chosen_set)
        if chosen_set == 'train':
            X_set = X_train
            y_set = y_train
        elif chosen_set == 'val':
            X_set = X_val
            y_set = y_val
        else:
            X_set = X_test
            y_set = y_test

        if only_input:
            image_data = X_set[imageID]
            if model == 'cifar':
                image_data = image_data.reshape((3, 32, 32))
                image_data = np.rollaxis(image_data, 0, 3) # 3 32 32 to 32 32 3
            else:
                image_data = image_data.reshape((28, 28))
            image_data *= 255
            image_data = image_data.astype('uint8')
            image = Image.fromarray(image_data)
            image.save(filename)
            print('image saved to :', filename)
            exit()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    net, net_output = model_io.load_model(model, model_file, nOutput, input_var)

    if not only_weights:
        print("Getting middle output...")

        output = lasagne.layers.get_output(net[layer_name])
        get_output_image = theano.function([input_var], output.flatten(3))

        output_shape = np.array(lasagne.layers.get_output_shape(net[layer_name]))
        foo, nKernel, h, w = output_shape
        print('layer ' + layer_name + ' shape :', output_shape)

        batch_output = get_output_image(np.array([X_set[imageID]]))
        images_output = batch_output[0]
        prediction = lasagne.layers.get_output(net_output)

        get_pred = theano.function([input_var], prediction)
        pred = get_pred(np.array([X_set[imageID]]))
    else:
        if model == 'cifar':
            weights = net[layer_name].W.get_value()
            print('weights shape :', weights.shape)
            nKernel, foo, h, w = weights.shape
            assert foo == 3
            flatten_w = net[layer_name].W.flatten(3)
            images_output = flatten_w.eval()
            images_output = np.rollaxis(images_output, 1, 0)  # nKernel 3 w*h to 3 nKernel w*h
            print('flatten weights shape :', images_output.shape)
        else:
            weights = net[layer_name].W.get_value()
            print('weights shape :', weights.shape)
            nKernel, foo, h, w = weights.shape
            assert foo == 1
            flatten_w = net[layer_name].W.flatten(2)
            images_output = flatten_w.eval()
            print('flatten weights shape :', images_output.shape)



    width = 1
    while width * width < nKernel:
        width += 1

    if width * width > nKernel:
        if images_output.ndim == 2:
            images_output = np.concatenate((images_output, np.zeros((width * width - nKernel, w * h))), axis=0)
        elif images_output.ndim == 3:
            images_output = np.concatenate((images_output, np.zeros((3, width * width - nKernel, w * h))), axis=1)
        else:
            assert False

    image = Image.fromarray(tile_raster_images(
        X=images_output,  # chose batch 0
        img_shape=(h, w), tile_shape=(width, width),
        tile_spacing=(1, 1)))
    image.save(filename)
    print('image saved to :', filename)