Example #1
0
 def createIntegrator(self):
     vx = Dataset('VX')
     vy = Dataset('VY')
     self.velocityIntegrator = FlowIntegrator(vx, vy)
     bx = Dataset('surfaceGradX')
     by = Dataset('surfaceGradY')
     self.surfaceIntegrator = FlowIntegrator(bx, by)
Example #2
0
def get_iterator(mode):
    normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
    kwargs = {'num_workers': 4, 'pin_memory': True}
    transform_augment = transforms.Compose([
        # transforms.RandomResizedCrop(args.size, scale=(0.8, 1.2)),  # random scale 0.8-1 of original image area, crop to args.size
        transforms.RandomResizedCrop(size),
        transforms.RandomRotation(15),  # random rotation -15 to +15 degrees
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
    transform = transforms.Compose([transforms.Resize((size, size)),
                                              transforms.ToTensor(),
                                              normalize,
                                              ])
    if mode:
        dataset = Dataset.MURA(split="train", transform=(transform_augment if augment else transform), type=type)
        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             **kwargs)
    else:
        dataset = Dataset.MURA(split="test", transform=transform, type=type)
        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             **kwargs)
    return loader
Example #3
0
def main():

    file = IOHelper().checkArg(sys.argv)
    if (len(file) < 1):
        print("Missing file")
        exit(1)

    d = Dataset()
    d.loadFile(file[0])

    fig, axes = plt.subplots(figsize=(18, 10))
    fig.tight_layout()

    start = 6
    width = 13

    widthStart = 0
    widthEnd = widthStart + width
    ystart = start
    for i in range(width):
        drawOneSub(d, start, ystart, range(widthStart, widthEnd))
        widthStart += width
        widthEnd += width
        start += 1
    print("")

    # plt.title(d.getName(index))

    plt.savefig('scatter_plot.png')
    plt.show()
Example #4
0
def loadDataset():
    dataFile = pd.read_csv("data/full_context_PeerRead.csv")
    column   = ['left_citated_text', 'right_citated_text', 'target_id', 'source_id', 'target_year', 'target_author']
    df       = dataFile[column]
    df       = cut_off_dataset(df, config.FREQUENCY)
    df       = slicing_citation_text(df, config.SEQ_LENGTH)


    trainDF, testDF                 = split_dataset(df, config.YEAR)
    trainDF, testDF, labelGenerator = get_label(df, trainDF, testDF)

    trainDF = trainDF.reset_index(drop=True)
    testDF  = testDF.reset_index(drop=True)


    trainDatatset = Dataset.BertBaseDataset(
        contextLeft=trainDF["leftSTRING"].values,  
        targetIndex = trainDF["LabelIndex"].values,
        contextRight=trainDF["rightSTRING"].values,
        isRight = config.isRight
        )

    testDatatset  = Dataset.BertBaseDataset(
        contextLeft=testDF["leftSTRING"].values,
        targetIndex = testDF["LabelIndex"].values,
        contextRight=testDF["rightSTRING"].values,
        isRight = config.isRight
        )

    return trainDatatset, testDatatset, labelGenerator
Example #5
0
    def __init__(self, **kwargs):

        params = set([
            'learning_rate', 'max_epochs', 'display_step', 'std_dev',
            'dataset_train', 'dataset_valid', 'dataset_test'
        ])

        # initialize all allowed keys to false
        self.__dict__.update((key, False) for key in params)
        # and update the given keys by their given values
        self.__dict__.update(
            (key, value) for key, value in kwargs.items() if key in params)

        if (self.dataset_train != False and self.dataset_valid != False):
            # Load the Training Set
            self.train_imgs_lab = Dataset.loadDataset(self.dataset_train)
            self.valid_imgs_lab = Dataset.loadDataset(self.dataset_valid)
        else:
            # Load the Test Set
            self.test_imgs_lab = Dataset.loadDataset(self.dataset_test)

        # Graph input
        self.img_pl = tf.placeholder(
            tf.float32, [None, RE_IMG_SIZE, RE_IMG_SIZE, n_channels])
        self.label_pl = tf.placeholder(tf.float32, [None, Dataset.NUM_LABELS])
        self.keep_prob = tf.placeholder(
            tf.float32)  # dropout (keep probability)
Example #6
0
def main():

    train_list = Dataset.make_datapath_list._make_datapath_list("tranings")

    size = 28

    #dataLoaderを作成
    train_dataset = Dataset.MyDataset(file_list=train_list,
                                      transform=Dataset.ImageTransform(size),
                                      phase='train')
    test_dataset = Dataset.testDataset(transform=Dataset.ImageTransform(size),
                                       phase='val')

    train_dataloder = torch.utils.data.DataLoader(train_dataset,
                                                  batch_size=32,
                                                  shuffle=True)
    test_dataloder = torch.utils.data.DataLoader(test_dataset,
                                                 batch_size=4,
                                                 shuffle=False)

    #辞書型変数にまとめる
    dataloders_dict = {"train": train_dataloder, "val": test_dataloder}

    #損失関数を設定
    criterion = nn.CrossEntropyLoss()

    model = Network.Net(10)
    model = model.to("cuda")
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    val_loss_list, train_loss_list, val_acc_list, train_acc_list = train_model(
        model, dataloders_dict, criterion, optimizer, num_epochs=30)
    torch.save(model.state_dict(), "mnist_cnn.pth")
 def setDataset(self):
     try:
         self.dataset = Dataset(
             raw_input("Enter the name of the dataset to import: "))
     except:
         print "Error loading dataset. Try again!"
         self.setDataset()
Example #8
0
def main():
    ########### 读取配置文件 ##########
    ch = config.ConfigHandler("./config.ini")
    ch.load_config()

    ########### 读取参数 ##########
    train_batch_size = int(ch.config["model"]["train_batch_size"])
    test_batch_size = int(ch.config["model"]["test_batch_size"])

    num_epochs = int(ch.config["model"]["num_epochs"])
    learning_rate = float(ch.config["model"]["learning_rate"])
    class_size = int(ch.config["model"]["class_size"])

    ########### 读取log和model ##########
    log_interval = int(ch.config["log"]["log_interval"])
    version_name = ch.config["log"]["version_name"]

    train_file = ch.config["data"]["train_file"]
    test_file = ch.config["data"]["test_file"]

    ########### 获取训练数据loader ##########
    data_train = Dataset.ImageDataset(train_file, train=True)
    data_loader_train = torch.utils.data.DataLoader(
        dataset=data_train, batch_size=train_batch_size, shuffle=True)

    ########### 获取测试数据loader ##########
    data_test = Dataset.ImageDataset(test_file, train=False)
    data_loader_test = torch.utils.data.DataLoader(dataset=data_test,
                                                   batch_size=test_batch_size,
                                                   shuffle=False)

    ########### 训练和评价 ##########
    train.train_and_test(num_epochs, learning_rate, class_size,
                         data_loader_train, data_loader_test, log_interval,
                         version_name).train_epoch()
def TransferedLearning():
    start_time = time.time()

    model = Models.GetTrainedWithImageNetByName(GlobalVariables.MODEL_TO_USE)
    model.summary()

    model.compile(optimizer='adam',
                  loss=keras.losses.mean_squared_error,
                  metrics=['accuracy'])

    image_height, image_length, color_depth = Models.Shapes[
        GlobalVariables.MODEL_TO_USE]
    history = model.fit(
        Dataset.getTrainingDatasetGenerator(image_height, image_length,
                                            color_depth == 1),
        epochs=GlobalVariables.EPOCHS,
        validation_data=Dataset.getTestingDatasetGenerator(
            image_height, image_length, color_depth == 1),
        callbacks=[
            keras.callbacks.ModelCheckpoint(
                "D:/Trained_models/{epoch:02d}e-{accuracy:.4f}-{val_accuracy:.4f}.h5",
                monitor='val_accuracy',
                save_best_only=True,
                verbose=0),
            keras.callbacks.CSVLogger('Training_histories/Test.csv',
                                      append=True,
                                      separator=';')
        ])

    print("Training time in sec:", time.time() - start_time)
    Display.TrainingHistory(history)
Example #10
0
def calcStochasticGradient():

    r = 0.1
    w = [0.0, 0.0, 0.0, 0.0]
    weights = []
    weights.append([0.0, 0.0, 0.0, 0.0])
    newWeight = [0.0, 0.0, 0.0, 0.0]
    gradients = []
    trainingData = Dataset('pr2Training.csv')

    # Compute gradient
    for example in trainingData.getExampleList():
        gradient = []
        lastWeight = copy.deepcopy(newWeight)
        yi, xi = getYiXi(example)

        for index in range(0, len(w)):
            xij = xi[index]
            gradient.append(
                ((yi - np.dot(np.transpose(w), np.array(xi))) * xij))
            newWeight[index] = lastWeight[index] + r * gradient[index]
        gradients.append(gradient)
        weights.append(copy.deepcopy(newWeight))

    print "weights: "
    for weight in weights:
        print str(weight)
    print "gradient: "
    for gradient in gradients:
        print str(gradient)
Example #11
0
    def __init__(self, data, labels, *args, **kwargs):
        super(Player, self).__init__(*args, **kwargs)

        self.data = Dataset(data)
        self.file_list = []
        for f in self.data.get_files():
            self.file_list.append(f.split("/")[-1])

        self.labelset = LabelSet(self.file_list, labels[0], labels[1])
        self.label_colors = {}
        self.label_colors[''] = self.palette().color(QPalette.Background)
        for i in labels[0]:
            self.label_colors[i] = "blue"
        for i in labels[1]:
            self.label_colors[i] = "red"

        self.setWindowTitle("test")
        self.status = {"playing": False}
        self.image_frame = bboxCanvas(848, 480)

        self.video_timer = QTimer()
        self.video_timer.timeout.connect(self.next_frame)
        self.video_timer.setInterval(30)

        self.createLabelBar()
        self.createVideoBar()
        self.makeDock()

        label_layout = QHBoxLayout()
        self.label_list_widget = QListWidget()
        self.label_list_widget.setFlow(QListView.LeftToRight)
        self.label_list_widget.setMaximumHeight(30)
        self.activeBox = -1
        lCycleBtn = QPushButton("<<")
        lCycleBtn.clicked.connect(self.boxCycleDown)
        rCycleBtn = QPushButton(">>")
        rCycleBtn.clicked.connect(self.boxCycleUp)
        remBtn = QPushButton("remove")
        remBtn.clicked.connect(self.remActiveBox)
        label_layout.addWidget(self.label_list_widget)
        label_layout.addWidget(lCycleBtn)
        label_layout.addWidget(rCycleBtn)
        label_layout.addWidget(remBtn)

        self.image_frame.new_box_signal.connect(self.mark_box)

        mainWidget = QWidget()
        layout = QVBoxLayout()
        layout.addWidget(self.labelBar)
        layout.addLayout(label_layout)
        layout.addWidget(self.image_frame)
        layout.addWidget(self.videoBar)
        mainWidget.setLayout(layout)
        self.setCentralWidget(mainWidget)

        self.label_range = {"start": [0, 0], "end": [0, 0]}

        self.render_frame()
        self.fillLabels()
 def __init__(self):
     self.ds = Dataset()
     self.allMethods = self.ds.getAllMethod()
     contents = []
     for method in self.allMethods:
         contents.append(method.content)
     self.tfidf = TFIDFAlg(contents)
     self.calcuatedsimi = {}
Example #13
0
def main():
    ########### 读取配置文件 ##########
    ch = config.ConfigHandler("./config.ini")
    ch.load_config()

    ########### 读取参数 ##########
    train_batch_size = int(ch.config["model"]["train_batch_size"])
    valid_batch_size = int(ch.config["model"]["valid_batch_size"])
    test_batch_size = int(ch.config["model"]["test_batch_size"])

    num_epochs = int(ch.config["model"]["num_epochs"])
    learning_rate = float(ch.config["model"]["learning_rate"])
    class_size = int(ch.config["model"]["class_size"])

    ########### 读取log和model ##########
    log_interval = int(ch.config["log"]["log_interval"])
    version_name = ch.config["log"]["version_name"]

    train_file = ch.config["data"]["train_file"]
    valid_file = ch.config["data"]["valid_file"]
    test_file = ch.config["data"]["test_file"]

    ########### 预测结果输出 ##########
    pred_file = ch.config["save"]["pred_file"]

    ########### 获取训练数据loader ##########
    data_train = Dataset.ImageDataset(train_file, train=True)
    data_loader_train = torch.utils.data.DataLoader(
        dataset=data_train, batch_size=train_batch_size, shuffle=True)

    ########### 获取验证数据loader ##########
    data_valid = Dataset.ImageDataset(valid_file, train=False)
    data_loader_valid = torch.utils.data.DataLoader(
        dataset=data_valid, batch_size=valid_batch_size, shuffle=True)

    ########### 获取测试数据loader ##########
    data_test = Dataset.ImageDataset(test_file, train=False)
    data_loader_test = torch.utils.data.DataLoader(dataset=data_test,
                                                   batch_size=test_batch_size,
                                                   shuffle=False)

    ########### 训练和评价 ##########
    trainer = train.train_and_test(num_epochs, learning_rate, class_size,
                                   data_loader_train, data_loader_valid,
                                   data_loader_test, log_interval,
                                   version_name, pred_file)

    ########## start train ###########
    print("start train")
    begin_time = time()
    trainer.train_epoch()
    end_time = time()
    run_time = end_time - begin_time
    print('cost time:', run_time)

    ########## start eval ###########
    print("start test")
    trainer.test()
    def calcFlux(self):

        self.vxInterp = Dataset('VX')
        self.vyInterp = Dataset('VY')

        trueFlux = self.calcTrueFlux()
        midFlux = self.calcMidFlux()
        sampleFlux = self.calcSampleFlux()
        naiveFlux = self.calcNaiveFlux()
Example #15
0
def main(argv=None):

    voc = Wordlist('./data/wordlist.txt')
    trainset = Dataset('./data/train.txt', voc, BATCH_SIZE)
    devset = Dataset('./data/train.txt', voc, BATCH_SIZE)
    trainset_label = Label('./data/train_label.txt')
    devset_label = Label('./data/train_label.txt')
    print "data loaded!"

    train(trainset,devset,trainset_label,devset_label,voc)
Example #16
0
def save_net_results(model_name, dataset_name):
    if dataset_name == 'mnist':
        train_images, train_labels, test_images, test_labels = datasets.mnist()
    elif dataset_name == 'shapes':
        train_images, train_labels, test_images, test_labels = datasets.shapes(
            'img/shapes')
    elif dataset_name == 'alienator':
        train_images, train_labels, test_images, test_labels = datasets.alienator(
            'img', 'train_keypoints.txt', 'test_keypoints.txt', rotated=False)
    elif dataset_name == 'alienator_custom':
        train_images, train_labels, test_images, test_labels = datasets.alienator(
            '.',
            'train_keypoints_custom.txt',
            'test_keypoints_custom.txt',
            rotated=False,
            kp_size_multiplier=30)
    elif dataset_name == 'alienator_custom_ns':
        train_images, train_labels, test_images, test_labels = datasets.alienator(
            '.',
            'train_keypoints_custom_ns.txt',
            'test_keypoints_custom_ns.txt',
            rotated=False,
            kp_size_multiplier=30)
    elif dataset_name == 'alienator2':
        train_images, train_labels, test_images, test_labels = datasets.alienator(
            'img',
            'train_keypoints2.txt',
            'test_keypoints2.txt',
            rotated=False)
    else:
        train_images, train_labels, test_images, test_labels = datasets.brown(
            dataset_name)

    train_dataset = Dataset(train_images, train_labels, size=64)
    test_dataset = Dataset(test_images,
                           test_labels,
                           mean=train_dataset.mean,
                           std=train_dataset.std,
                           size=64)

    network_desc = NetworkDesc(model_file=model_name + '.h5')
    # network_desc = NetworkDescPN(model_file=model_name + '.h5')

    batch = test_dataset.get_batch_triplets(100000)

    positives_net, negatives_net = get_positives_negatives(
        get_net_descriptors(network_desc, batch[0]),
        get_net_descriptors(network_desc, batch[1]),
        get_net_descriptors(network_desc, batch[2]))

    results_dir = 'results/{}/'.format(model_name)
    if not os.path.isdir(results_dir):
        os.makedirs(results_dir)
    np.save('{}{}.positives'.format(results_dir, dataset_name), positives_net)
    np.save('{}{}.negatives'.format(results_dir, dataset_name), negatives_net)
Example #17
0
def runOnServoDataset():
    data = Dataset(name="servo", directory="./datasets/")
    run(data, 'servo')

    if (False):
        X, Y = data.get_dataset()
        Y = np.array([Y])
        Y = Y.transpose()
        print("Shape X " + str(X.shape))
        print("Shape Y " + str(Y.shape))

        fun = RegressioneLineare(X, Y)
        pendec = PenaltyDecomposition(fun,
                                      x_0=np.array([X[0]]).transpose(),
                                      gamma=1.1,
                                      max_iterations=5,
                                      l0_constraint=15,
                                      tau_zero=1)
        pendec.start()

        inexact = InexactPenaltyDecomposition(fun,
                                              x_0=np.array([X[0]]).transpose(),
                                              gamma=1.1,
                                              max_iterations=5,
                                              l0_constraint=15,
                                              tau_zero=1)
        inexact.start()

        dfpd = DFPenaltyDecomposition(fun,
                                      x_0=np.array([X[0]]).transpose(),
                                      gamma=1,
                                      max_iterations=1,
                                      l0_constraint=15,
                                      tau_zero=2)

        dfpd = DFPenaltyDecomposition(fun,
                                      x_0=np.array([X[0]]).transpose(),
                                      gamma=1.1,
                                      max_iterations=3,
                                      l0_constraint=15,
                                      tau_zero=1)
        dfpd = DFPenaltyDecomposition(fun,
                                      x_0=np.array([np.ones(fun.number_of_x)
                                                    ]).transpose(),
                                      gamma=1.1,
                                      max_iterations=3,
                                      l0_constraint=15,
                                      tau_zero=1)
        dfpd = DFPenaltyDecomposition(fun,
                                      x_0=x0,
                                      gamma=1.1,
                                      max_iterations=1,
                                      l0_constraint=15,
                                      tau_zero=1)
        dfpd.start()
Example #18
0
    def __init__(self):
        self.d = Dataset()
        self.startMonth = self.d.monthNames[0][
            2:5] + " 20" + self.d.monthNames[0][5:]
        self.endMonth = self.d.monthNames[len(self.d.monthNames) -
                                          1][2:5] + " 20" + self.d.monthNames[
                                              len(self.d.monthNames) - 1][5:]

        self.times = []
        for i in range(len(self.d.months)):
            self.times.append(i)
Example #19
0
def main():
    # Limit GPU usage
    limit_gpu()

    # Project configurations
    config = Config.Config()

    # Convert numpy datasets to tfrecord datasets
    Dataset.convert_numpy_to_tfrecord(config, False)

    # Train model
    Train.train_model(config)
Example #20
0
    def Filter(nn, ds, predicate):
        ret = Dataset(ds.LabelCount())

        for i in range(ds.Count()):
            datum = ds.GetDatum(i)
            ground_label = ds.GetLabel(i)

            if predicate(nn,datum,ground_label):
                ret.Data.Add(Dataset.MemAccessor(datum))
                ret.Labels.Add(Dataset.MemAccessor(ground_label))

        return ret
Example #21
0
class Split:
    """This class is the model of a split.
    While it is very similar to a DecisionTree, the main difference is that the
    split contains the real associated left and right datasets.

    Moreover, the Split method contains also a method allowing easily to compute
    the gain of the split.

    @see DecisionTree.py
    """
    def __init__(self, is_numerical):
        self.is_numerical = is_numerical

        self.left = Dataset()
        self.right = Dataset()

        self.feature_index = None
        if is_numerical:
            self.feature_range = None
        else:
            self.feature_range = {}

        self.gain = -1

    def add_category_range( self, value ):
        self.feature_range[ value ] = True

    def set_numerical_range( self, value ):
        self.feature_range = float(value)

    def place(self, records, index):
        """Puts the records in the good side, with respect to the feature present at the given index.

        Also updates value of gini and gain
        """
        self.feature_index = index

        for r in records:
            if self.is_numerical and float(r.features[ self.feature_index ]) <= self.feature_range:
                side = self.left
            elif not self.is_numerical and r.features[ self.feature_index ] in self.feature_range:
                side = self.left
            else:
                side = self.right
            side.append( r )

        self.left.update()
        self.right.update()
        # compute gain
        self.left_gini = self.left.gini
        self.right_gini = self.right.gini
        l, r, n = self.left.size, self.right.size, float(records.size)
        self.gain = records.gini - (l/n)*self.left_gini - (r/n)*self.right_gini
Example #22
0
    def __init__(self, is_numerical):
        self.is_numerical = is_numerical

        self.left = Dataset()
        self.right = Dataset()

        self.feature_index = None
        if is_numerical:
            self.feature_range = None
        else:
            self.feature_range = {}

        self.gain = -1
def main():
    img_path     = "lena.png"
    dataset_path = "/home/martin/datasets/flickr/thumbnails"
    dataset_csv  = "dataset.csv"
    block_size   = 64

    dataset = Dataset(dataset_path)
    #dataset.create()
    file_paths, rgb_means = dataset.load(dataset_csv)

    collage = Collage(img_path, file_paths, rgb_means, block_size)
    img_collage = collage.create()
    io.imsave("collage.png", img_collage)
Example #24
0
def create_dataset_icdar2015(img_root, gt_root, output_path):
    im_list = os.listdir(img_root)
    im_path_list = []
    gt_list = []
    for im in im_list:
        name, _ = os.path.splitext(im)
        gt_name = 'gt_' + name + '.txt'
        gt_path = os.path.join(gt_root, gt_name)
        if not os.path.exists(gt_path):
            print('Ground truth file of image {0} not exists.'.format(im))
        im_path_list.append(os.path.join(img_root, im))
        gt_list.append(gt_path)
    assert len(im_path_list) == len(gt_list)
    Dataset.create_dataset(output_path, im_path_list, gt_list)
Example #25
0
    def __init__(self, **kwargs):

        params = set(['learning_rate','max_epochs','display_step','dataset_training','dataset_test'])

        # initialize all allowed keys to false
        self.__dict__.update((key, False) for key in params)
        # and update the given keys by their given values
        self.__dict__.update((key, value) for key, value in kwargs.iteritems() if key in params)

        if(self.dataset_training != False):
            self.train_imgs_lab = Dataset.loadDataset(self.dataset_training)
        else:
            self.test_imgs_lab = Dataset.loadDataset(self.dataset_test)

        
        # Store layers weight & bias
        self.weights = {
            'wc1': tf.Variable(tf.random_normal([11, 11, n_channels, BATCH_SIZE], stddev=std_dev)),
            'wc2': tf.Variable(tf.random_normal([5, 5, BATCH_SIZE, BATCH_SIZE*2], stddev=std_dev)),
            'wc3': tf.Variable(tf.random_normal([3, 3, BATCH_SIZE*2, BATCH_SIZE*4], stddev=std_dev)),
            'wc4': tf.Variable(tf.random_normal([3, 3, BATCH_SIZE*4, BATCH_SIZE*4], stddev=std_dev)),
            'wc5': tf.Variable(tf.random_normal([3, 3, BATCH_SIZE*4, 256], stddev=std_dev)),

            'wd': tf.Variable(tf.random_normal([1024, 4096])),
            'wfc': tf.Variable(tf.random_normal([4096, 1024], stddev=std_dev)),

            'out': tf.Variable(tf.random_normal([1024, n_classes], stddev=std_dev))
        }

        self.biases = {
            'bc1': tf.Variable(tf.random_normal([BATCH_SIZE])),
            'bc2': tf.Variable(tf.random_normal([BATCH_SIZE*2])),
            'bc3': tf.Variable(tf.random_normal([BATCH_SIZE*4])),
            'bc4': tf.Variable(tf.random_normal([BATCH_SIZE*4])),
            'bc5': tf.Variable(tf.random_normal([256])),

            'bd': tf.Variable(tf.random_normal([4096])),
            'bfc': tf.Variable(tf.random_normal([1024])),

            'out': tf.Variable(tf.random_normal([n_classes]))
        }

        # Graph input
        self.img_pl = tf.placeholder(tf.float32, [None, n_input, n_channels])
        self.label_pl = tf.placeholder(tf.float32, [None, n_classes])
        self.keep_prob_in = tf.placeholder(tf.float32)
        self.keep_prob_hid = tf.placeholder(tf.float32)
        
        # Create a saver for writing training checkpoints.
        self.saver = tf.train.Saver()
Example #26
0
def create_specialized_csv(target_type,
                           train_samples,
                           test_samples,
                           keep_existing_cache,
                           data_folder='./data',
                           use_complete_dataset=True):
    train_csv_file = data_folder + '/' + target_type + '_train_' + str(
        train_samples) + '.csv'
    test_csv_file = data_folder + '/' + target_type + '_test_' + str(
        test_samples) + '.csv'

    if keep_existing_cache and os.path.isfile(
            train_csv_file) and os.path.isfile(test_csv_file):
        # If csv files already exist, then keep them
        # TODO: Check content size of files to make sure we have the same amount of samples
        return train_csv_file, test_csv_file

    ds = pd.read_csv('stage_1_train_nice.csv')
    if test_samples <= 1:
        test_samples = math.floor(ds[ds[target_type] == 1].shape[0] *
                                  test_samples)

    if train_samples <= 1:
        if use_complete_dataset:
            # Use the complete dataset. Copy the dataset which is smaller
            if ds[ds[target_type] != 1].shape[0] > ds[ds[target_type] ==
                                                      1].shape[0]:
                total_examples = ds[ds[target_type] != 1].shape[0]
            else:
                total_examples = ds[ds[target_type] == 1].shape[0]
            train_samples = math.floor(total_examples - test_samples)
        else:
            total_examples = ds[ds[target_type] == 1].shape[0]
            train_samples = math.floor(total_examples * train_samples)

    dataset = ds[ds[target_type] == 1].sample(test_samples)
    ds = ds.drop(dataset.index)
    none_ds = ds[ds[target_type] == 0].sample(test_samples)
    ds = ds.drop(none_ds.index)
    test_ds = pd.concat([dataset, none_ds]).sample(frac=1)
    test_ds.to_csv(test_csv_file, index=None, header=True)

    dataset = ds[ds[target_type] == 1].sample(train_samples, replace=True)
    ds = ds.drop(dataset.index)
    none_ds = ds[ds[target_type] == 0].sample(train_samples, replace=True)
    ds = ds.drop(none_ds.index)
    train_ds = pd.concat([dataset, none_ds]).sample(frac=1)
    train_ds.to_csv(train_csv_file, index=None, header=True)

    return train_csv_file, test_csv_file
Example #27
0
def main():

    parser = argparse.ArgumentParser(description='A convolutional neural network for image recognition')
    subparsers = parser.add_subparsers()

    common_args = [
        (['-lr', '--learning-rate'], {'help':'learning rate', 'type':float, 'default':0.05}),
        (['-e', '--epochs'], {'help':'epochs', 'type':int, 'default':2}),
        (['-ds', '--display-step'], {'help':'display step', 'type':int, 'default':10}),
        (['-sd', '--std-dev'], {'help':'std-dev', 'type':float, 'default':0.1}),
        (['-d', '--dataset'],  {'help':'dataset file', 'type':str, 'default':'test_dataset.p'})
    ]

    parser_train = subparsers.add_parser('train')
    parser_train.set_defaults(which='train')
    for arg in common_args:
        parser_train.add_argument(*arg[0], **arg[1])

    parser_preprocess = subparsers.add_parser('preprocessing')
    parser_preprocess.set_defaults(which='preprocessing')
    parser_preprocess.add_argument('-f', '--file', help='output file', type=str, default='images_dataset.p')
    parser_preprocess.add_argument('-s', '--shuffle', help='shuffle dataset', action='store_true')
    parser_preprocess.set_defaults(shuffle=False)

    parser_predict = subparsers.add_parser('predict')
    parser_predict.set_defaults(which='predict')
    for arg in common_args:
        parser_predict.add_argument(*arg[0], **arg[1])

    args = parser.parse_args()
    if args.which in ('train'):
        log.basicConfig(filename='FileLog.log', level=log.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filemode="w")

    if args.which in ('train', 'predict'):
        # create the object ConvNet
        conv_net = ConvNet(args.learning_rate, args.epochs, args.display_step, args.std_dev, args.dataset)
        if args.which == 'train':
            # TRAINING
            log.info('Start training')
            conv_net.training()
        else:
            # PREDICTION
            conv_net.prediction()
    elif args.which == 'preprocessing':
#        if args.shuffle:
#            shuffle(args.file)
#        else:
        Dataset.saveDataset(IMAGE_DIR, args.file)
Example #28
0
def train(model_name, restore=True):
    import_lib()
    global config, logger
    config = Config.config
    dataset = Dataset.Dataset()
    dataset.prepare_dataset()
    logger = utils.get_logger(model_name)

    model = PHVM.PHVM(len(dataset.vocab.id2featCate),
                      len(dataset.vocab.id2featVal),
                      len(dataset.vocab.id2word),
                      len(dataset.vocab.id2category),
                      key_wordvec=None,
                      val_wordvec=None,
                      tgt_wordvec=dataset.vocab.id2vec,
                      type_vocab_size=len(dataset.vocab.id2type))
    init = {'epoch': 0, 'worse_step': 0}
    if restore:
        init['epoch'], init['worse_step'], model = model_utils.restore_model(
            model,
            config.checkpoint_dir + "/" + model_name + config.tmp_model_dir,
            config.checkpoint_dir + "/" + model_name + config.best_model_dir)
    config.check_ckpt(model_name)
    summary = tf.summary.FileWriter(config.summary_dir, model.graph)
    _train(model_name, model, dataset, summary, init)
    logger.info("finish training {}".format(model_name))
Example #29
0
def Information_gain(dataset, node):
    info_gain = None

    print "Information gain heurisitic"

    entropy_set = Entropy_Set(dataset)

    neg_dict, pos_dict = Dataset.split_dataset(dataset, node)

    entropy_members = Entropy_Members(dataset,neg_dict,pos_dict,node)

    if entropy_set == 'F':
        info_gain = 0
        print "Info gain for all negative examples", info_gain
        return (info_gain,'NA',{},{})

    elif entropy_set == 'T':
        info_gain = 1
        print "Info gain for all positive examples", info_gain
        return (info_gain,'NA',{},{})

    else:
        info_gain = entropy_set - entropy_members
        info_gain = float(format(info_gain,".4f"))
        print "Info gain for ", node,": " , info_gain
        return (info_gain, node, neg_dict, pos_dict)
Example #30
0
def main(argv=None):
    voc = Wordlist('./data/wordlist.txt')
    testset = Dataset('./data/train.txt', voc, BATCH_SIZE)
    testset_label = Label('./data/train_label.txt')
    print "data loaded!"

    evaluate(testset, testset_label, voc)
Example #31
0
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):

        #print(target.shape)
        coord,avg_values,A_spatial = dset.prepareGraph(data)
        #print(coord.shape,avg_values.shape,A_spatial.shape)
        data = [torch.from_numpy(np.concatenate((coord, avg_values), axis=2)).float().cuda(),
                torch.from_numpy(A_spatial).float().cuda(), False]
        #print(len(data))
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))


    train_loss = train_loss/(batch_idx+1)
    torch.save(model.state_dict(),'model_superpixel.pt')
    print(
        '\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            train_loss, correct, len(train_loader.dataset),
            100. * correct / len(train_loader.dataset)))
    return train_loss,correct
Example #32
0
def main():
    args = get_args()

    if args.train:
        train(args.model_name, args.restore)
    else:
        import_lib()
        dataset = Dataset.Dataset()
        model = PHVM.PHVM(len(dataset.vocab.id2featCate),
                          len(dataset.vocab.id2featVal),
                          len(dataset.vocab.id2word),
                          len(dataset.vocab.id2category),
                          key_wordvec=None,
                          val_wordvec=None,
                          tgt_wordvec=dataset.vocab.id2vec,
                          type_vocab_size=len(dataset.vocab.id2type))

        best_checkpoint_dir = config.checkpoint_dir + "/" + args.model_name + config.best_model_dir
        tmp_checkpoint_dir = config.checkpoint_dir + "/" + args.model_name + config.tmp_model_dir
        model_utils.restore_model(model, best_checkpoint_dir,
                                  tmp_checkpoint_dir)

        dataset.prepare_dataset()
        texts = infer(model, dataset, dataset.test)
        dump(texts, config.result_dir + "/{}.json".format(args.model_name))
        utils.print_out("finish file test")
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    batch_size = 5

    # data loading
    test_dir = './data_5_5/test'
    test_dataset = Dataset.RadarGesture(test_dir)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False)

    # model loading
    model = GestureNet().to(device)
    print(model)
    model.load_state_dict(torch.load('model.pth'))

    # test
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            print('Predicted:', predicted, 'Real:', labels)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the {} test images: {} %'.format(
        len(test_loader) * batch_size, 100 * correct / total))
Example #34
0
def to_dataset(out):
    '''
    converts a list (sample, genes ...) into a dataset
    '''
    all_genes = []
    samples = []
    dicts = []
    #prepare lines
    for line in out:
        samples.append(line[0])
        all_genes += line[1:]
        dic = {}
        for x in line:
            dic[x] = 1
        dicts.append(dic)
    # select unique genes
    all_genes = list(set(all_genes))
    # prepare lines
    lines = []
    for i in range(len(samples)):
        line = [samples[i]]
        for gene in all_genes:
            try:
                line.append(dicts[i][gene])
            except KeyError:
                line.append(0)
        lines.append(line)
        
    data = Dataset.Dataset()   
    data.create(all_genes,lines) 
        
    return data 
def main():

    parser = argparse.ArgumentParser(description='A convolutional neural network for image recognition')
    subparsers = parser.add_subparsers()

    common_args = [
        (['-lr', '--learning-rate'], {'help':'learning rate', 'type':float, 'default':0.1}),
        (['-e', '--epochs'], {'help':'epochs', 'type':int, 'default':5}),
        (['-ds', '--display-step'], {'help':'display step', 'type':int, 'default':10}),
        (['-sd', '--std-dev'], {'help':'std-dev', 'type':float, 'default':1.0}),
        (['-d', '--dataset'],  {'help':'dataset file', 'type':str, 'default':'images_dataset.pkl'})
    ]

    parser_train = subparsers.add_parser('train')
    parser_train.set_defaults(which='train')
    for arg in common_args:
        parser_train.add_argument(*arg[0], **arg[1])

    parser_preprocess = subparsers.add_parser('preprocessing')
    parser_preprocess.set_defaults(which='preprocessing')
    parser_preprocess.add_argument('-f', '--file', help='output file', type=str, default='images_dataset.pkl')


    parser_predict = subparsers.add_parser('predict')
    parser_predict.set_defaults(which='predict')
    for arg in common_args:
        parser_predict.add_argument(*arg[0], **arg[1])

    args = parser.parse_args()

    log.basicConfig(filename='FileLog.log', level=log.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filemode="w")


    if args.which in ('train', 'predict'):
        t = timeit.timeit("Dataset.loadDataset(IMAGE_DIR)", setup="from __main__ import *")
        log.info("Execution time of Dataset.loadDataset(IMAGE_DIR) (__main__) = %.4f sec" % t)

        # create the object ConvNet
        conv_net = ConvNet(args.learning_rate, args.epochs, args.display_step, args.std_dev, args.dataset)
        if args.which == 'train':
            # TRAINING
            conv_net.training()
        else:
            # PREDICTION
            conv_net.prediction()
    elif args.which == 'preprocessing':
        Dataset.saveDataset(IMAGE_DIR, args.file)
def main():
    d = Dataset("rec.sport.hockey.txt", "rec.sport.baseball.txt", cutoff=2000)
    (Xtrain, Ytrain, Xtest, Ytest) = d.getTrainAndTestSets(0.8, seed=100)

    pC1 = getClassProb(Ytrain, -1)
    pC2 = getClassProb(Ytrain, 1)

    wordList = d.getWordList()
    w1 = [getFeatureProb(Xtrain, Ytrain, -1, wordIndex) for wordIndex in range(len(wordList))]
    aw1 = np.asarray(w1)
    w2 = [getFeatureProb(Xtrain, Ytrain, 1, wordIndex) for wordIndex in range(len(wordList))]
    aw2 = np.asarray(w2)

    trainError = computeError(Xtrain, Ytrain, pC1, pC2, aw1, aw2)
    print 'Train error rate is ' + str(trainError)
    testError = computeError(Xtest, Ytest, pC1, pC2, aw1, aw2)
    print 'Test error rate is ' + str(testError)
Example #37
0
def main():
    d = Dataset("rec.sport.hockey.txt", "rec.sport.baseball.txt", cutoff=200)
    (Xtrain, Ytrain, Xtest, Ytest) = d.getTrainAndTestSets(0.8, seed=100)

    lam = 100
    cols = []
    currentError = 1
    n = Xtrain.shape[1]
    dic = {}

##  i is the number of features to be added to cols
    for i in range(40):
        bestJ = 0
        bestErrorRate = 1
        for j in range(n):
            cols.append(j)     
            w = trainRidge(Xtrain[:, cols], Ytrain, lam)
            errorRate = computeError(Xtrain[:, cols], Ytrain, w)
            if errorRate < bestErrorRate:
                bestJ = j
                bestErrorRate = errorRate
##                print 'Best error rate is ' + str(bestErrorRate)
            cols.pop()
            
        if bestErrorRate >= currentError:
            break
        else:
            cols.append(bestJ)  
            dic[bestJ] = currentError - bestErrorRate
            currentError = bestErrorRate
            print 'Current error rate is ' + str(currentError)

    w = trainRidge(Xtrain[:, cols], Ytrain, lam)
    trainError = computeError(Xtrain[:, cols], Ytrain, w)
    print 'Train error rate is ' + str(trainError)
    testError = computeError(Xtest[:, cols], Ytest, w)
    print 'Test error rate is ' + str(testError)

##  find the top 10 features
    wordList = d.getWordList()
    topCols = [(key, value) for key, value in sorted(dic.iteritems(), key = lambda(k, v) : (v, k), reverse = True)]
    topCols = topCols[: 10]
    topFeatures = [wordList[index] for (index, value) in topCols]
    for f in topFeatures:
        print f
def main():
    d = Dataset("rec.sport.hockey.txt", "rec.sport.baseball.txt", cutoff=2000)
    (Xtrain, Ytrain, Xtest, Ytest) = d.getTrainAndTestSets(0.8, seed=100)
    w = np.asmatrix([0 for elem in range(Xtrain.shape[1])])

    learningRate = 1

##  numTrial is the total number of rounds we want to go through before stopping (in case it is not converged)
##  k is to keep track of how many rounds we have been through   
    numTrial = 5
    k = 0

##  wSum is to count the sum of w in a given round
##  wAvg is to count the avg of w in a given round
    wAvg = w
    while makeError(Xtrain, Ytrain, wAvg):
        
        if k >= numTrial:
            print "No perfect hyperplane found!"
            print "Stop after " + str(numTrial) + " iterations."
            break
        k += 1
        
        for i in range(Xtrain.shape[0]):
            expected = -1
            xtrain = np.asmatrix(Xtrain[i]).T
            if w * xtrain > 0:
                expected = 1
            if expected != Ytrain[i]:
                w = w + learningRate * Ytrain[i] * Xtrain[i]
            if i == 0:
                wSum = w
            else:
                wSum += w
        wAvg = wSum / Xtrain.shape[0]

    trainError = computeError(Xtrain, Ytrain, w)
    print 'Train error rate is ' + str(trainError)
    testError = computeError(Xtest, Ytest, w)
    print 'Test error rate is ' + str(testError)
def main():
    d = Dataset("rec.sport.hockey.txt", "rec.sport.baseball.txt", cutoff=1000)
    (Xtrain, Ytrain, Xtest, Ytest) = d.getTrainAndTestSets(0.8, seed=100)

    lam = 100
    cols = []
    currentError = 1
    n = Xtrain.shape[1]
    dic = {}

    for j in range(n):
        cols.append(j)     
        w = trainRidge(Xtrain[:, cols], Ytrain, lam)
        errorRate = computeError(Xtrain[:, cols], Ytrain, w)
        if errorRate >= currentError:
            cols.pop()
        else:
            dic[j] = currentError - errorRate
            currentError = errorRate
##          print out currentError once a while  
            if j % 10 == 0:
                print currentError

    w = trainRidge(Xtrain[:, cols], Ytrain, lam)
    trainError = computeError(Xtrain[:, cols], Ytrain, w)
    print 'Train error rate is ' + str(trainError)
    testError = computeError(Xtest[:, cols], Ytest, w)
    print 'Test error rate is ' + str(testError)

##  find the top 10 features
    wordList = d.getWordList()
    topCols = [(key, value) for key, value in sorted(dic.iteritems(), key = lambda(k, v) : (v, k), reverse = True)]
    topCols = topCols[: 10]
    topFeatures = [wordList[index] for (index, value) in topCols]
    for f in topFeatures:
        print f
def main():
    print("This is a program to compute the min, max, mean and ")
    print('standard deviation for a set of numbers.\n')
    data = Dataset()
    while True:
        xStr = input('Enter a number (<Enter> to quit): ')
        if xStr == "":
            break
        try:
            x = float(xStr)
        except ValueError:
            print("Invalid Entry Ignored: Input was not a number")
            continue
        data.add(x)
    print('Summary of', data.size(), 'scores.')
    print('Min: ', data.min())
    print('Max: ', data.max())
    print('Mean: ', data.mean())
    print('Standard Deviation: ', data.std_deviation())
    def __init__(self, learning_rate, max_epochs, display_step, std_dev, dataset):

        # Initialize params
        self.learning_rate=learning_rate
        self.max_epochs=max_epochs
        self.display_step=display_step
        self.std_dev=std_dev
        self.dataset = dataset
        self.gen_imgs_lab = Dataset.loadDataset(dataset)
        
        # Store layers weight & bias
        self.weights = {
            'wc1': tf.Variable(tf.random_normal([11, 11, 3, 96], stddev=std_dev)),
            'wc2': tf.Variable(tf.random_normal([5, 5, 96, 192], stddev=std_dev)),
            'wc3': tf.Variable(tf.random_normal([3, 3, 192, 384], stddev=std_dev)),
            'wc4': tf.Variable(tf.random_normal([3, 3, 384, 384], stddev=std_dev)),
            'wc5': tf.Variable(tf.random_normal([3, 3, 384, 256], stddev=std_dev)),
            
            'wd': tf.Variable(tf.random_normal([12544, 4096])),
            'wfc': tf.Variable(tf.random_normal([4096, 1024], stddev=std_dev)),
            
            'out': tf.Variable(tf.random_normal([1024, n_classes], stddev=std_dev))
        }
        
        self.biases = {
            'bc1': tf.Variable(tf.random_normal([96])),
            'bc2': tf.Variable(tf.random_normal([192])),
            'bc3': tf.Variable(tf.random_normal([384])),
            'bc4': tf.Variable(tf.random_normal([384])),
            'bc5': tf.Variable(tf.random_normal([256])),
            
            'bd': tf.Variable(tf.random_normal([4096])),
            'bfc': tf.Variable(tf.random_normal([1024])),

            'out': tf.Variable(tf.random_normal([n_classes]))
        }

        # Graph input
        self.img_pl = tf.placeholder(tf.float32, [None, n_input, n_channels])
        self.label_pl = tf.placeholder(tf.float32, [None, n_classes])
        self.keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
        
        # Create a saver for writing training checkpoints.
        self.saver = tf.train.Saver()
Example #42
0
def test():
    '''
    Test decision tree learning and classification
    '''
    ##Zoo Example
    zoo_attributes = ['hair','feathers','eggs', 'milk', 'airborne','aquatic', 'predator','toothed','backbone','breathes','venomous', 'fins', 'legs','tail', 'domestic','catsize']

    ds = Dataset(16,1) ##training Dataset
    ds.loadFromFile('zoo_train.data',1) ##load it from file
    tst = Dataset(16,1) ##testing Dataset
    tst.loadFromFile('zoo_test.data',1)
    attr = [i for i in range(16)] ##[0,1,2...,15]
    print "++++++++++++++++++++++++++++++++ZOO++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
    tree = DTL(ds,attr,mostFrequent(ds.targets),ds,verbose = False) ##make decision tree
    recallOnDataset(tree,tst)
    print '+++++++++++++++++++++++++++++END ZOO +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'
Example #43
0
datasetsOnSites={}
datasetSet={}
for line in phedexFile:
    l = line.split()
    datasetName=l[0]
    siteName=l[5]
    if not re.match(datasetPattern,datasetName):
        continue
    if re.match(".*BUNNIES.*",datasetName):  
        # get rid of T0 testing datasets
        # why are these even in DDM?
        continue
    if not re.match(siterx,siteName):
        continue
    if datasetName not in datasetSet:
    	datasetObject = Dataset(datasetName)
        datasetSet[datasetName] = datasetObject
    else:
    	datasetObject = datasetSet[datasetName]
    datasetObject.isDeleted = False
    datasetObject.addCurrentSite(siteName,l[6],l[7])
    datasetObject = None

# remove blacklisted datasets
blacklistFile = open(os.environ.get('MONITOR_DB')+'/datasets/blacklist.log','r')
blacklistSet = set(map(lambda x : x.split()[0], list(blacklistFile)))
removeByKey(datasetSet,blacklistSet)

for fileName in sorted(files):
    if debug>0:
        print ' Analyzing: ' + fileName
def GrowTree(dataset, attributes, level):

    print "Constructing Decision Tree" , level
    max_gain, max_gain_attr, level = 0, None, 0

    print "Attributes ", attributes

    if not attributes:
        common_val = get_max_val(dataset.get("Class"))
        root = BTreeNode(str(common_val))
        root.left = None
        root.right = None

    else:

        if dataset.has_key('NA'):
            return BTreeNode(str(dataset.get('NA')))

        else:

            class_list = dataset.get("Class")

            tmp_negcnt, tmp_poscnt = get_count(class_list)

            if tmp_poscnt == 0:
                print "class: all negative examples"
                return BTreeNode('0')

            if tmp_negcnt == 0:
                print "class: all positive examples"
                return BTreeNode('1')


            for val in attributes:

                neg_dict, pos_dict = Dataset.split_dataset(dataset, val)

                variance_set = Heuristics.Variance_Impurity_Set(class_list)

                print "Variance Impurity set for ", val ,variance_set

                member_list = dataset.get(val)
                variance_member = Heuristics.Variance_Impurity_Members(dataset,neg_dict,pos_dict,member_list)
                print "Variance Impurity member for ",val,variance_member

                var_gain = Heuristics.gain(variance_set, variance_member)
                print "Variance Impurity gain for ",val ,var_gain

                print "Bool value - zeros" , bool([a for a in neg_dict.values() if a == []])
                print "Bool value - ones" , bool([a for a in pos_dict.values() if a == []])

                if bool([a for a in neg_dict.values() if a == []]):
                    print "Sub values empty - zero dataset"
                    common_val = get_max_val(dataset.get("Class"))
                    neg_dict = {}
                    neg_dict.update({'NA':common_val})

                elif bool([a for a in pos_dict.values() if a == []]):
                    print "Sub values empty - one dataset"
                    common_val = get_max_val(dataset.get("Class"))
                    pos_dict = {}
                    pos_dict.update({'NA':common_val})


                if var_gain > max_gain:
                    max_gain = var_gain
                    max_gain_attr = val
                    root_zero_dataset = neg_dict
                    root_one_dataset = pos_dict

                else:
                    max_gain = var_gain
                    max_gain_attr = val
                    root_zero_dataset = neg_dict
                    root_one_dataset = pos_dict



            print "Maximum Information Gain: ",max_gain
            print "Node selected" , max_gain_attr
            print "Zero Dataset", root_zero_dataset
            print "One Dataset", root_one_dataset

        root = BTreeNode(max_gain_attr)

        if max_gain_attr in attributes:
            attributes.remove(max_gain_attr)

        if root != None:
            root.left = GrowTree(root_zero_dataset,attributes,level)
            root.right = GrowTree(root_one_dataset,attributes,level)


    level+= 1


    return root
                print "|",
            print temp.data,

            # leaf node in right subtree
            if(temp.right.data == '0' or temp.right.data == '1'):
                print "= 1 :",temp.right.data
                temp = None
            else:
                print "= 1 :"
                level+=1
                temp = temp.right



if __name__ == "__main__":

    attributes = []

    att_dict = Dataset.load_dataset('data_sets1/verysmall.csv')
    for itm in att_dict.keys():
        if itm != "Class":
            attributes.append(itm)

    print attributes

    node = GrowTree(att_dict, attributes, 0)
    print_tree(node)



Example #46
0
 def setUp(self):
     self.data = Dataset.loadSmallPickledData()
     self.net = DeNet()
Example #47
0
def runSmall():
    data = Dataset.loadSmallPickledData()
    net = DeNet()
    net.train(data, 2, 50, 0.1)
    def training(self):
        # Launch the graph
        with tf.Session() as sess:
            # Construct model
            logits, prediction = self.alex_net_model(self.img_pl, self.weights, self.biases, self.keep_prob)

            # TO check # Define loss and optimizer
            # http://stackoverflow.com/questions/33922937/why-does-tensorflow-return-nan-nan-instead-of-probabilities-from-a-csv-file
            # equivalent to
            # tf.nn.softmax(...) + cross_entropy(...)
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.label_pl))
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)

            # Evaluate model
            correct_pred = tf.equal(tf.argmax(prediction,1), tf.argmax(self.label_pl, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

            # Initializing the variables
            init = tf.initialize_all_variables()

            # Run the Op to initialize the variables.
            sess.run(init)
            summary_writer = tf.train.SummaryWriter(CKPT_DIR, graph=sess.graph)

            log.info('Dataset created - images list and labels list')
            log.info('Now split images and labels in Training and Test set...')


            ##################################################################

            # collect imgs for test
            tests_imgs_batches = [b for i, b in enumerate(self.BatchIterator(BATCH_SIZE)) if i < 3]

            # Run for epoch
            for epoch in range(self.max_epochs):
                self.gen_imgs_lab = Dataset.loadDataset(self.dataset)
                # Loop over all batches
                #for step in range(num_batch):
                for step, elems in enumerate(self.BatchIterator(BATCH_SIZE)):
                    batch_imgs_train, batch_labels_train = elems
                    ### create itrator over batch list ###
                    #batch_imgs_train, batch_labels_train = self.BatchIterator(BATCH_SIZE)
                    # ### call next() for next batch of imgs and labels ###
                    # batch_imgs_train, batch_labels_train = iter_.next()

                    # Fit training using batch data
                    _, single_loss = sess.run([optimizer, loss], feed_dict={self.img_pl: batch_imgs_train, self.label_pl: batch_labels_train, self.keep_prob: dropout})
                    # Display logs per epoch step
                    if step % self.display_step == 0:
                        # print "Step %03d - Epoch %03d/%03d - single_loss %.7f" % (step, epoch, self.max_epochs, single_loss)
                        # log.info("Step %03d - Epoch %03d - single_loss %.7f" % (step, epoch, avg_loss/step, single_loss))
                        # Calculate training batch accuracy and batch loss
                        train_acc, train_loss = sess.run([accuracy, loss], feed_dict={self.img_pl: batch_imgs_train, self.label_pl: batch_labels_train, self.keep_prob: 1.})
                        print "Training Accuracy = " + "{:.5f}".format(train_acc)
                        log.info("Training Accuracy = " + "{:.5f}".format(train_acc))
                        print "Training Loss = " + "{:.6f}".format(train_loss)
                        log.info("Training Loss = " + "{:.6f}".format(train_loss))

            print "Optimization Finished!"

            #print "Accuracy = ", sess.run(accuracy, feed_dict={self.img_pl: batch_imgs_train, self.label_pl: batch_labels_train, self.keep_prob: 1.0})

            # Save the models to disk
            save_model_ckpt = self.saver.save(sess, MODEL_CKPT)
            print("Model saved in file %s" % save_model_ckpt)

            # Test accuracy
            for step, elems in enumerate(tests_imgs_batches):
                batch_imgs_test, batch_labels_test = elems
                
                test_acc = sess.run(accuracy, feed_dict={self.img_pl: batch_imgs_test, self.label_pl: batch_labels_test, self.keep_prob: 1.0})
                print "Test accuracy: %.5f" % (test_acc)
                log.info("Test accuracy: %.5f" % (test_acc))
Example #49
0
def runSmall():
    data = Dataset.loadSmallPickledData()
    net = LoopyNet()
    net.train(data, 30, 50, 0.1)
Example #50
0
 def setUp(self):
     # self.data = Dataset.SmallerDataset()
     self.data = Dataset.loadSmallPickledData()
     self.net = LoopyNet()
Example #51
0
if __name__ == "__main__":
    global heuristic
    # print sys.argv

    attributes = []
    l = int(float(sys.argv[1]))
    k = int(float(sys.argv[2]))
    trainset = sys.argv[3]
    testset = sys.argv[4]
    validset = sys.argv[5]
    toprint = sys.argv[6]
    heuristic = int(float(sys.argv[7]))

    # print "Training Set"
    train_set = Dataset.load_dataset(trainset)
    for itm in train_set.keys():
        if itm != "Class":
            attributes.append(itm)

    node = GrowTree(train_set, attributes)

    # print "Test Set"
    test_set = Dataset.load_dataset(testset)
    accuracy = accuracy_tree(node,test_set)

    print "Test set Accuracy percentage" , accuracy

    vaildation_set = Dataset.load_dataset(validset)

    print "Post Pruning accuracy " , post_pruning(node,l,k,vaildation_set)
Example #52
0
def runMedium():
    data = Dataset.loadMediumPickledData()
    net = LoopyNet()
    net.train(data, 30, 100, 0.1)
Example #53
0
def runBig():
    data = Dataset.loadPickledData()
    net = LoopyNet()
    net.train(data, 30, 100, 0.1)
Example #54
0
def GrowTree(dataset, attributes):
    global cnt_nonleaf_nodes,heuristic
    max_gain_attr =  None
    max_gain = 0.0
    gain = 0.0

    # print "Attributes ", attributes

    if not attributes:
        common_val = get_max_val(dataset.get("Class"))
        root = BTreeNode(str(common_val))
        root.left = None
        root.right = None

    else:

        if dataset.has_key('NA'):
            return BTreeNode(str(dataset.get('NA')))

        else:

            class_list = dataset.get("Class")
            # print class_list

            tmp_negcnt, tmp_poscnt = get_count(class_list)

            if tmp_poscnt == 0:
                print "class: all negative examples"
                # print class_list
                return BTreeNode('0')

            if tmp_negcnt == 0:
                print "class: all positive examples"
                # print class_list
                return BTreeNode('1')


            for val in attributes:

                neg_dict, pos_dict = Dataset.split_dataset(dataset, val)
                # print "Neg dict class" , neg_dict.get("Class")
                # print "Pos dict class" , pos_dict.get("Class")

                if heuristic == 0:
                    entropy_set = Heuristics.Entropy_Set(class_list)
                elif heuristic == 1:
                    variance_set = Heuristics.Variance_Impurity_Set(class_list)

                # print "Entropy set for ", val ,entropy_set

                member_list = dataset.get(val)
                if heuristic == 0:
                    entropy_member = Heuristics.Entropy_Members(dataset,neg_dict,pos_dict,member_list)
                elif heuristic == 1:
                    variance_member = Heuristics.Variance_Impurity_Members(dataset,neg_dict,pos_dict,member_list)
                # print "Entropy member for ",val,entropy_member

                if heuristic == 0:
                    gain = Heuristics.gain(entropy_set, entropy_member)
                elif heuristic == 1:
                    gain = Heuristics.gain(variance_set, variance_member)
                print "gain for ",val ,gain


                if bool([a for a in neg_dict.values() if a == []]):
                    print "Sub values empty - zero dataset"
                    common_val = get_max_val(dataset.get("Class"))
                    neg_dict = {}
                    neg_dict.update({'NA':common_val})

                elif bool([a for a in pos_dict.values() if a == []]):
                    print "Sub values empty - one dataset"
                    common_val = get_max_val(dataset.get("Class"))
                    pos_dict = {}
                    pos_dict.update({'NA':common_val})


                if gain >= max_gain:
                    max_gain = gain
                    max_gain_attr = val
                    root_zero_dataset = neg_dict
                    # print "inside max gain cal zeros ",val, neg_dict.get("Class")
                    root_one_dataset = pos_dict
                    # print "inside max gain cal ones ",val, pos_dict.get("Class")

                neg_dict = {}
                pos_dict = {}
                # print


            print "Maximum Information Gain: ",max_gain
            print "Node selected: " , max_gain_attr
            print "Zero Dataset: ", root_zero_dataset.get("Class")
            print "One Dataset: ", root_one_dataset.get("Class")

        root = BTreeNode(max_gain_attr)
        cnt_nonleaf_nodes += 1
        root.order = cnt_nonleaf_nodes
        root.subset = dataset

        if max_gain_attr in attributes:
            attributes.remove(max_gain_attr)

        if root != None:

            # if root.left:
                root.left = GrowTree(root_zero_dataset,attributes)
            # if root.right:
                root.right = GrowTree(root_one_dataset,attributes)


    return root