def BPNet(file_name):
    rengong_filename = r'C:\Users\Administrator\Desktop\yanshan_rengong1.tif'
    P = []
    T = []
    butoushui_P = LoadData(1, file_name, rengong_filename)
    butoushui_P = RD.sample(butoushui_P, 2000)
    butoushui_P = sio.loadmat('../JadeLibSVM/' +
                              'butoushui_P.mat')['butoushui_P']
    M = len(butoushui_P)
    P = butoushui_P
    P = butoushui_P.tolist()
    T = [1] * M
    print M

    toushui_P = LoadData(0, file_name, rengong_filename)
    toushui_P = RD.sample(toushui_P, 2000)
    toushui_P = sio.loadmat('../JadeLibSVM/' + 'toushui_P.mat')['toushui_P']
    M = len(toushui_P)
    P.extend(toushui_P)
    toushui_P = [0] * M
    T.extend(toushui_P)
    print M

    nn = NeuralNetwork([3, 2, 1], 'tanh')
    nn.fit(P, T, 0.01, 5000)
    print('**************训练结束****************')
    p_test = extract_Yanshan('')
    predict_label = []
    for i in p_test:
        predict_label.append(nn.predict(i)[0])
    pic = array(Image.open(file_name))
    X = pic.shape[0]
    Y = pic.shape[1]
    P = pic.shape[2]
    Test_data = np.zeros((X * Y, 3), dtype='double')
    k = 0
    for i in range(X):
        for j in range(Y):
            Test_data[k, 0] = pic[i, j, 0]
            Test_data[k, 1] = pic[i, j, 1]
            Test_data[k, 2] = pic[i, j, 2]
            k = k + 1
    result = np.zeros((X, Y, 3))  #RGB彩图
    for k in range(X * Y):  #  R分量         G分量       B分量
        if (predict_label[k] >= 0.5):
            Test_data[k, 0] = 1
            Test_data[k, 1] = 1
            Test_data[k, 2] = 1  #白色
        elif (predict_label[k] < 0.5):
            Test_data[k, 0] = 0
            Test_data[k, 1] = 0
            Test_data[k, 2] = 0  #%黑色
    k = 0
    for i in range(X):
        for j in range(Y):
            result[i, j, 0] = Test_data[k, 0]
            result[i, j, 1] = Test_data[k, 1]
            result[i, j, 2] = Test_data[k, 2]
            k = k + 1
    return result
Ejemplo n.º 2
0
def main():
    global domain
    global domain_distance
    global distance_file

    if not len(sys.argv) == 4:
        print(
            "python testBetaIC.py <k> <beta> <domain number> \n Domain num: \n 0 : accident, 1: sanitation, 2: crime, 3: adult"
        )
        return
    k = int(sys.argv[1])  #50
    beta = float(sys.argv[2])
    domain_num = int(sys.argv[3])

    domain = domain_arr[domain_num]
    domain_distance = distance_arr[domain_num]
    print(domain + " " + domain_distance)
    Ld = LoadData(domain)
    G = Ld.readFile()
    distance_file = ""
    if (os.path.isfile(domain + "_distance.txt")):
        distance_file = domain + "_distance.txt"

    print("Dataset:", domain, "K = ", k, "Distance:", domain_distance, "beta=",
          beta)
    aff_array = test_Kcenter(G, k, domain, domain_distance)
    print("\n")
    print(
        "#######################################################################\n"
    )
    bs = betaStrong(domain, G, aff_array, k, beta, domain_distance,
                    distance_file)
    aff_array = bs.beta_IC()
    calculate_composition(G, k, aff_array, domain)
    del Ld
Ejemplo n.º 3
0
def train(FLAGS):
    # Data loading
    import pickle as pk
    data = DATA.LoadData(FLAGS.path, FLAGS.dataset)

    if FLAGS.verbose > 0:
        print(
            "FM: dataset=%s, embedding_size=%d,#epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, keep=%s, metric=%s, optimizer=%s, batch_norm=%d"
            % (FLAGS.dataset, FLAGS.embedding_size, FLAGS.epoch,
               FLAGS.batch_size, FLAGS.lr, FLAGS.lamda, FLAGS.keep,
               FLAGS.metric, FLAGS.optimizer, FLAGS.batch_norm))

    # Training
    t1 = time()
    model = FM(data.features_M, FLAGS.pretrain, make_save_file(FLAGS),
               FLAGS.embedding_size, FLAGS.valid_dimen, FLAGS.epoch,
               FLAGS.metric, FLAGS.batch_size, FLAGS.lr, FLAGS.lamda,
               FLAGS.keep, FLAGS.optimizer, FLAGS.batch_norm, FLAGS.verbose)
    model.train(data.Train_data, data.Validation_data, data.Test_data)

    # Find the best validation result across iterations
    best_valid_score = 0
    best_valid_score = min(model.valid_rmse)
    best_epoch = model.valid_rmse.index(best_valid_score)
    print("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" %
          (best_epoch + 1, model.train_rmse[best_epoch],
           model.valid_rmse[best_epoch], time() - t1))
def train(args):
    # Data loading
    data = DATA.LoadData(args.path, args.dataset)
    if args.verbose > 0:
        print("AFM: dataset=%s, factors=%s, attention=%d, freeze_fm=%d, #epoch=%d, batch=%d, lr=%.4f, lambda_attention=%.1e, keep=%s, optimizer=%s, batch_norm=%d, decay=%f, activation=%s"
              %(args.dataset, args.hidden_factor, args.attention, args.freeze_fm, args.epoch, args.batch_size, args.lr, args.lamda_attention, args.keep, args.optimizer, 
              args.batch_norm, args.decay, args.activation))
    activation_function = tf.nn.relu
    if args.activation == 'sigmoid':
        activation_function = tf.sigmoid
    elif args.activation == 'tanh':
        activation_function == tf.tanh
    elif args.activation == 'identity':
        activation_function = tf.identity
    
    save_file = make_save_file(args)
    # Training
    t1 = time()

    num_variable = data.truncate_features()
    if args.mla:
        args.freeze_fm = 1
    model = AFM(data.features_M, args.pretrain, save_file, args.attention, eval(args.hidden_factor), args.valid_dimen, 
        activation_function, num_variable, args.freeze_fm, args.epoch, args.batch_size, args.lr, args.lamda_attention, eval(args.keep), args.optimizer, 
        args.batch_norm, args.decay, args.verbose, args.mla)
    
    model.train(data.Train_data, data.Validation_data, data.Test_data)
    
    # Find the best validation result across iterations
    best_valid_score = 0
    best_valid_score = min(model.valid_rmse)
    best_epoch = model.valid_rmse.index(best_valid_score)
    print ("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" 
           %(best_epoch+1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], time()-t1))
Ejemplo n.º 5
0
def train(args):
    # Data loading
    data = DATA.LoadData(args.path, args.dataset)
    if args.verbose > 0:
        print(
            "FM: dataset=%s, factors=%d, #epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, keep=%.2f, optimizer=%s, batch_norm=%d"
            %
            (args.dataset, args.hidden_factor, args.epoch, args.batch_size,
             args.lr, args.lamda, args.keep, args.optimizer, args.batch_norm))

    # Training
    t1 = time()
    model = FM(data.features_M, args.pretrain, make_save_file(args),
               args.hidden_factor, args.epoch, args.batch_size, args.lr,
               args.lamda, args.keep, args.optimizer, args.batch_norm,
               args.verbose, args.mla)
    model.train(data.Train_data, data.Validation_data, data.Test_data)

    # Find the best validation result across iterations
    best_valid_score = 0
    best_valid_score = min(model.valid_rmse)
    best_epoch = model.valid_rmse.index(best_valid_score)
    print(
        "Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s], test = %.4f [%.1f s]"
        % (best_epoch + 1, model.train_rmse[best_epoch],
           model.valid_rmse[best_epoch], model.test_rmse[best_epoch],
           time() - t1))
Ejemplo n.º 6
0
 def getLogisticRegression (self) :
     """ Fit the model to the trainig data."""
     
     ld=LoadData(self.argumentsDict)
     X,y=ld.loadTrainingDataSet()
     tm=TrainModels(X,y)
     model=tm.getModelLogistic()
     return model,ld
Ejemplo n.º 7
0
 def getDummy (self) :
     """ Fit the model to the trainig data."""
     
     ld=LoadData(self.argumentsDict)
     X,y=ld.loadTrainingDataSet()
     tm=TrainModels(X,y)
     model=tm.getDummy()
     return model,ld
Ejemplo n.º 8
0
    def test(self):
        load_test = LoadData()
        self.folder_test = load_test.data_test
        self.train_generator, self.x_train, self.x_valid, self.y_train, self.y_valid = load_test.loadDataTrain(
        )
        self.test_generator, self.x_test = load_test.loadDataTest(
            self.folder_test)

        model = load_model('./model.h5')

        self.test_generator.reset()
        pred = model.predict_generator(self.test_generator,
                                       verbose=1,
                                       steps=600 / 1)

        predicted_class_indices = np.argmax(pred, axis=1)

        labels = (self.train_generator.class_indices)
        labels = dict((v, k) for k, v in labels.items())
        prediksi = [labels[k] for k in predicted_class_indices]
        path = self.test_generator.filenames

        filenames = []
        for x in range(len(path)):
            filenames.append(path[x][12:len(path[x]) - 8])

        true_pred = 0
        compare = []
        for x in range(len(filenames)):
            if filenames[x] == prediksi[x]:
                # true_pred = true_pred + 1
                compare.append("False")
            else:
                true_pred = true_pred + 1
                compare.append("True")

        row = len(self.test_generator)

        list_prediksi = []
        for i in range(row):
            list_prediksi.append([filenames[i], prediksi[i], compare[i]])

        # print result to console
        # s = ""
        # for i in range(row):
        #     print(i, list_prediksi[i])
        s = ''.join(prediksi[0:row])
        # print(s)

        self.progressBar.setValue(100)

        self.txtLR.setText(s)

        persentase = (true_pred / len(filenames)) * 100
        # print(persentase)

        self.lblHasil.setText("Tingkat Akurasi : %.2f%%" % (persentase))
Ejemplo n.º 9
0
    def processTestSVM(self):
        print('Loading data ...')
        # 导入数据
        self.dL = Ld.LoadData()
        #self.trainData = self.dL.loadCsvData('tmp/trainDataSetByT.csv')
        self.testData = self.dL.loadCsvData('tmp/testDataSetByT.csv')

        print('testing ...')
        self.testBySvm(self.testData)
Ejemplo n.º 10
0
    def processTrainBayes(self):
        print('Loading data ...')
        # 导入数据
        self.dL = Ld.LoadData()
        self.trainData = self.dL.loadCsvData('tmp/trainDataSetByT.csv')
        #self.testData = self.dL.loadCsvData('tmp/testDataSetByT.csv')

        print('training ...')
        self.trainByBayes(self.trainData)
Ejemplo n.º 11
0
def evaluate(args):
    # load test data
    data = DATA.LoadData(args.path).Test_data
    save_file = make_save_file(args)

    # load the graph
    weight_saver = tf.train.import_meta_graph(save_file + '.meta')
    pretrain_graph = tf.get_default_graph()

    # load tensors
    feature_embeddings = pretrain_graph.get_tensor_by_name(
        'feature_embeddings:0')
    nonzero_embeddings = pretrain_graph.get_tensor_by_name(
        'nonzero_embeddings:0')
    feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0')
    bias = pretrain_graph.get_tensor_by_name('bias:0')
    fm = pretrain_graph.get_tensor_by_name('fm:0')
    fm_out = pretrain_graph.get_tensor_by_name('fm_out:0')
    out = pretrain_graph.get_tensor_by_name('out:0')
    train_features = pretrain_graph.get_tensor_by_name('train_features_fm:0')
    train_labels = pretrain_graph.get_tensor_by_name('train_labels_fm:0')
    dropout_keep = pretrain_graph.get_tensor_by_name('dropout_keep_fm:0')
    train_phase = pretrain_graph.get_tensor_by_name('train_phase_fm:0')

    # restore session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    weight_saver.restore(sess, save_file)

    # start evaluation
    num_example = len(data['Y'])
    feed_dict = {
        train_features: data['X'],
        train_labels: [[y] for y in data['Y']],
        dropout_keep: 1.0,
        train_phase: False
    }
    ne, fe = sess.run((nonzero_embeddings, feature_embeddings),
                      feed_dict=feed_dict)
    _fm, _fm_out, predictions = sess.run((fm, fm_out, out),
                                         feed_dict=feed_dict)

    # calculate rmse
    y_pred = np.reshape(predictions, (num_example, ))
    y_true = np.reshape(data['Y'], (num_example, ))

    predictions_bounded = np.maximum(y_pred,
                                     np.ones(num_example) *
                                     min(y_true))  # bound the lower values
    predictions_bounded = np.minimum(predictions_bounded,
                                     np.ones(num_example) *
                                     max(y_true))  # bound the higher values
    RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded))

    print("Test RMSE: %.4f" % (RMSE))
    logging.info("Test RMSE: %.4f" % (RMSE))
Ejemplo n.º 12
0
def train(args):
    # Data loading

    data = DATA.LoadData(args.path, args.dataset)
    if args.verbose > 0:
        print("DeepAFM: dataset=%s, factors=%s, valid_dim=%d, #epoch=%d, batch=%d, lr=%.4f, \
            lambda_attention=%.1e, keep=%s, optimizer=%s, batch_norm=%d, decay=%f, \
            activation=%s, field_size=%d, dropout_deep=%s, deep_layers=%s"
              %(args.dataset, args.hidden_factor, args.valid_dimen,args.epoch, args.batch_size, \
                args.lr, args.lamda_attention, args.keep, args.optimizer,\
              args.batch_norm, args.decay, args.activation, args.field_size,\
              args.dropout_deep, args.deep_layers))
    activation_function = tf.nn.relu
    if args.activation == 'sigmoid':
        activation_function = tf.sigmoid
    elif args.activation == 'tanh':
        activation_function == tf.tanh
    elif args.activation == 'identity':
        activation_function = tf.identity

    save_file = make_save_file(args)
    # Training
    t1 = time()

    model = DeepAFM(data.features_M,
                    args.pretrain,
                    save_file,
                    eval(args.hidden_factor),
                    args.valid_dimen,
                    activation_function,
                    args.epoch,
                    args.batch_size,
                    args.lr,
                    args.lamda_attention,
                    eval(args.keep),
                    args.optimizer,
                    args.batch_norm,
                    args.decay,
                    args.verbose,
                    args.field_size,
                    random_seed=2016,
                    i_gpu=args.gpu,
                    dropout_deep=eval(args.dropout_deep),
                    deep_layers=eval(args.deep_layers))

    model.train(data.Train_data, data.Validation_data, data.Test_data)

    # Find the best validation result across iterations
    best_valid_score = 0
    best_valid_score = max(model.valid_rmse)
    best_epoch = model.valid_rmse.index(best_valid_score)
    print("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" %
          (best_epoch + 1, model.train_rmse[best_epoch],
           model.valid_rmse[best_epoch], time() - t1))
Ejemplo n.º 13
0
def getDataSet(pDict, resdir):
    """Load the data set"""

    ld = LoadData(pDict)
    X_train, y_train = ld.loadTrainingDataSet()
    X_test, dbkeys = ld.loadTestDataSet()
    dictML = {
        "X_test": X_test,
        "X_train": X_train,
        "y_train": y_train,
        "resultsDirectory": resdir
    }
    return dictML, dbkeys
Ejemplo n.º 14
0
def train(args):
    # Data loading
    load = DATA.LoadData("data")
    data = load.getdata()
    cdata = load.cdata()
    cnn_label = load.getcnn()
    if args.verbose > 0:
        print(
            "FM:   #epoch=%d, batch=%d, lr=%.4f,  optimizer=%s, batch_norm=%d"
            % (args.epoch, args.batch_size, args.lr,
               args.optimizer, args.batch_norm))


    sjnum = len(data[20])
    sdnum = len(data[21])
    model = SLR(sjnum, sdnum, args)
    model.train(cnn_label, data, cdata)
Ejemplo n.º 15
0
def evaluate(args):
    # load test data
    data = DATA.LoadData(args.path, args.dataset).Test_data
    save_file = make_save_file(args)

    # load the graph
    weight_saver = tf.train.import_meta_graph(save_file + '.meta')
    pretrain_graph = tf.get_default_graph()
    # load tensors
    # feature_embeddings = pretrain_graph.get_tensor_by_name('feature_embeddings:0')
    # feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0')
    # bias = pretrain_graph.get_tensor_by_name('bias:0')
    # afm = pretrain_graph.get_tensor_by_name('afm:0')
    out_of_afm = pretrain_graph.get_tensor_by_name('DeepAFM_out:0')
    # placeholders for afm
    train_features_afm = pretrain_graph.get_tensor_by_name(
        'train_features_afm:0')
    train_labels_afm = pretrain_graph.get_tensor_by_name('train_labels_afm:0')
    dropout_keep_afm = pretrain_graph.get_tensor_by_name('dropout_keep_afm:0')
    train_phase_afm = pretrain_graph.get_tensor_by_name('train_phase_afm:0')
    dropout_keep_deep = pretrain_graph.get_tensor_by_name(
        'dropout_keep_deep:0')

    # restore session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    weight_saver.restore(sess, save_file)

    # start evaluation
    num_example = len(data['Y'])
    feed_dict = {
        train_features_afm: data['X'],
        train_labels_afm: [[y] for y in data['Y']],
        dropout_keep_afm: [1.0, 1.0],
        dropout_keep_deep: [1.0] * 3,
        train_phase_afm: False
    }
    predictions = sess.run((out_of_afm), feed_dict=feed_dict)

    # calculate rmse
    y_pred_afm = np.reshape(predictions, (num_example, ))
    y_true = np.reshape(data['Y'], (num_example, ))

    auc_score = roc_auc_score(y_true, y_pred_afm)
    print("Test AUC: {:.6f}".format(auc_score))
Ejemplo n.º 16
0
def train(args):
    # Data loading
    load = DATA.LoadData(args.dataset)
    data = load.getdata()
    data_for_validation = load.data_for_validation()
    sentence_label = load.getsentence()
    if args.verbose > 0:
        print(
            "args: #batch_size=%d, epoch=%d, batch=%d, lr=%.4f,  optimizer=%s, batch_norm=%d, L2_regularization=%f"
            % (args.batch_size, args.epoch, args.batch_size, args.lr,
               args.optimizer, args.batch_norm, args.L2_regularization))
        logging.info(
            "args: #batch_size=%d, epoch=%d, batch=%d, lr=%.4f,  optimizer=%s, batch_norm=%d, L2_regularization=%f"
            % (args.batch_size, args.epoch, args.batch_size, args.lr,
               args.optimizer, args.batch_norm, args.L2_regularization))
    train_data_length = len(data[20])
    test_data_length = len(data[21])
    model = SLR(train_data_length, test_data_length, args)
    model.train(sentence_label, data, data_for_validation)
Ejemplo n.º 17
0
def main():
    global domain
    global distance_file
    global domain_distance

    if not len(sys.argv) == 4:
        print("Input format:")
        print(
            "python testCluster.py <k> <domain number> <approach> \n Domain num: \n 0 : accident, 1: sanitation, 2: crime, 3: adult"
        )
        print(
            "Approach: \n 0 : strong-interpretability (IKC), 1: k-center, 2: Partition, 3: KC_F"
        )
        return

    k = int(sys.argv[1])  #50
    domain_num = int(sys.argv[2])
    approach = int(sys.argv[3])

    domain = domain_arr[domain_num]
    domain_distance = distance_arr[domain_num]

    Ld = LoadData(domain)
    G = Ld.readFile()

    distance_file = ""
    if (os.path.isfile(domain + "_distance.txt")):
        distance_file = domain + "_distance.txt"

    print("Dataset:", domain, "K = ", k, "Distance:", domain_distance,
          "distance file = ", distance_file)
    if (approach == 0):
        test_IKC1(G, k, domain, distance_file)
    elif approach == 1:
        test_Kcenter(G, k, domain)
    elif approach == 2:
        baseline_partition(G, k, domain, distance_file)
    else:
        raw_Interpretability(G, k, domain)

    del Ld
Ejemplo n.º 18
0
def run():
    time.clock()
    t0 = float(time.clock())

    # load data from file, and do normalization on X.
    [trainX, trainY, testX, testY] = ld.LoadData()
    t1 = float(time.clock())
    print 'Loading data from File. using time %.4f s, \n' % (t1 - t0)

    [trainX, testX] = nor.Normalization(trainX, testX)
    t2 = float(time.clock())
    print 'Normalization on train & test X. using time %.4f s, \n' % (t2 - t1)

    # implementation assignments
    lr_reg = [0.001, 0.01, 0.1, 1, 10, 100]  #learning rate
    max_iter = 1000000  # max iteration
    eps = 0.001  # gradient comparing epsilon
    lmd_reg = [0, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10,
               100]  # regularization lambda

    # part 1, lamda = 0, different learning rate
    best_lr = run_part1(trainX,
                        trainY)  #default lr,grad_epsilon and max_iterations
    # [lr,bestloss,weight,lossCont] = HW1_part_1(trainX,trainY) #default lr,grad_epsilon and max_iterations
    t3 = float(time.clock())
    print 'Part 1, lamda = 0, changing lr, using time %.4f s, \n' % (t3 - t2)

    # part2: fixed learning rate, different lamda
    max_iter = 10000
    run_part2(trainX, trainY, testX, testY, lmd_reg, best_lr, eps, max_iter)
    t4 = float(time.clock())
    print 'Part 2, lr = 0.05, changing lmd, using time %.4f s, \n' % (t4 - t3)

    # part3: fixed lr, using 10-fold cross-validation
    # split training data into k parts
    max_iter = 1000
    k = 10
    run_part3(trainX, trainY, testX, testY, best_lr, eps, max_iter, lmd_reg, k)
    t5 = float(time.clock())
    print 'Part 3, lr = 0.05, fidining the best lmd, using time %.4f s, \n' % (
        t4 - t3)
Ejemplo n.º 19
0
def train(args):
    data = DATA.LoadData(args.path, args.dataset)
    if args.verbose > 0:
        print("PNN: dataset=%s, factors=%s, epoch=%d, batch_size=%d, lr=%.4f, keep=%s,\
          optimizer=%s, batch_norm=%s, decay=%f, activation=%s, use_inner=%d, D1=%d"                                                                                     %(args.dataset, args.hidden_factor, \
          args.epoch, args.batch_size, args.lr, eval(args.keep), args.optimizer, args.batch_norm, args.decay, \
          args.activation, args.use_inner, args.D1))

    activation_function = tf.nn.relu
    if args.activation == 'sigmoid':
        activation_function = tf.sigmoid
    elif args.activation == 'tanh':
        activation_function = tf.tanh
    elif args.activation == 'identity':
        activation_function = tf.identity

    save_file = make_save_file(args)

    ## training
    t1 = time()
    model = PNN(data.features_M, data.field, args.hidden_factor, args.pretrain,
                save_file, activation_function, args.epoch, args.batch_size,
                args.lr, args.optimizer, args.batch_norm, args.decay,
                eval(args.keep), args.use_inner, args.D1)
    model.train(data.Train_data, data.Validation_data, data.Test_data)

    ## find the best validation result across iterations

    best_valid_score = 0
    if model.greater_is_better:
        best_valid_score = max(model.valid_rmse)
    else:
        best_valid_score = min(model.valid_rmse)

    best_epoch = model.valid_rmse.index(best_valid_score)
    print("Best Iter(validation)=%d\t train = %.4f, valid = %.4f [%.1f s]" %
          (best_epoch + 1, model.train_rmse[best_epoch],
           model.valid_rmse[best_epoch], time() - t1))
    def __init__(self, name, train_dataset, val_dataset, w2v_model_number):

        np.random.seed(7)

        self.name = name

        timestamp = time.time()
        date_time = str(
            datetime.datetime.fromtimestamp(timestamp).strftime('%d%m%Y-%H%M'))

        self.result_folder = r'./ExperimentResults/' + date_time + '_' + self.name

        os.makedirs(self.result_folder)

        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(levelname)-8s %(message)s',
                            datefmt='%a, %d %b %Y %H:%M:%S',
                            filename=self.result_folder + '/info.log',
                            filemode='w')
        sys.excepthook = log_exceptions

        self.loaded_data = LoadData.LoadData(w2v_model_number, train_dataset,
                                             val_dataset, self.result_folder)

        # get size of state and action, and inputs
        self.state_space = [SENTENCE_LENGTH, EMBEDDING_SIZE]
        self.action_size = self.loaded_data.action_size
        self.image_vector_size = self.loaded_data.image_vector_size

        # create model for actor network
        self.model = Models.build_actor_model(self.state_space,
                                              self.action_size,
                                              self.image_vector_size)

        with open(self.result_folder + '/model_architecture.txt', 'w') as fh:
            self.model.summary(print_fn=lambda x: fh.write(x + '\n'))
        with open(self.result_folder + '/model_config.bin', 'wb') as fh:
            pickle.dump(self.model.get_config(), fh, protocol=2)
Ejemplo n.º 21
0
def main():

    args = Parser.parse_command_line()
    
    samples,outdir = ld.LoadData(**args)
    dataset = ld.GetData(samples,outdir,**args)    
    
    # Creation of a directory for trained pickle file
    pkldir = join(outdir,'trained_pickle')
    if not os.path.isdir(pkldir):
        os.system('mkdir -p %s' % pkldir)
    else:
        pass
    
    if args['pkl_file'] is not None:
        tag = args['pkl_file'].replace('%s/trained' % pkldir,'').replace('.pkl','')
        ml = MLA.MLA(tag, dataset, **args)
        clf = joblib.load(args['pkl_file'])
        print "\nTrained forest is loaded from %s" % (args['pkl_file'])
        print clf        
    else:
        ml,clf = training_go_or_stop(dataset,pkldir,**args)

    evaluation_go_or_stop(ml,clf,samples,outdir,**args)
Ejemplo n.º 22
0
    '''
    This main function is specific to the experiments performed in the paper.
    '''

    args = parse_args()

    if os.path.isdir('logDir'):
        shutil.rmtree('logDir/')

    items = build_itemlist('training/ratings.txt', 'testing/ratings.txt', 'error_imgs.txt',  args.path) # ordered list of image tweets is created

    users_discard = [u.lower() for u in
                     ['ConnorRyan90', 'grierrxnash', 'austio311', 'Nodays_off__', 'cutiestylespie', 'eazzz_e',
                      'eminemlights', 'Im_Wierdd1027', 'shay1498', 'urchkin', 'miley23isHot']] # for these users the tweet history was not long enough for accurate personality extraction

    users_filtered = build_userlist('users/traits.csv', users_discard,  args.path) #  the 862 users in the dataset
    users_all = build_userlist('users/traits.csv', [],  args.path) # all the users in the csv

    data = DATA.LoadData(args.path, args.dataset, items, users_filtered, args.item_ft, args.user_ft, users_all) # instance of class LoadData

    if args.verbose > 0:
        print("FM: dataset=%s, factors=%d, num_epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, optimizer=%s"
              % (args.dataset, args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda,
                 args.optimizer))

    model = FM(data.features_M, args.hidden_factor, args.epoch,
               args.batch_size, args.lr, args.lamda, args.optimizer, args.verbose, args.path, len(users_filtered),
               len(items), args.item_ft, args.user_ft, args.keep_prob, args.batch_norm)

    model.train(data.Train_data, data.Test_data)
Ejemplo n.º 23
0
def evaluate(args):
    # load test data
    data = DATA.LoadData(args.path, args.dataset).Test_data
    save_file = make_save_file(args)

    # load the graph
    weight_saver = tf.train.import_meta_graph(save_file + '.meta')
    pretrain_graph = tf.get_default_graph()
    # load tensors
    # feature_embeddings = pretrain_graph.get_tensor_by_name('feature_embeddings:0')
    feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0')
    bias = pretrain_graph.get_tensor_by_name('bias:0')
    afm = pretrain_graph.get_tensor_by_name('afm:0')
    out_of_afm = pretrain_graph.get_tensor_by_name('out_afm:0')
    interactions = pretrain_graph.get_tensor_by_name('interactions:0')
    attention_out = pretrain_graph.get_tensor_by_name('attention_out:0')
    # placeholders for afm
    train_features_afm = pretrain_graph.get_tensor_by_name(
        'train_features_afm:0')
    train_labels_afm = pretrain_graph.get_tensor_by_name('train_labels_afm:0')
    dropout_keep_afm = pretrain_graph.get_tensor_by_name('dropout_keep_afm:0')
    train_phase_afm = pretrain_graph.get_tensor_by_name('train_phase_afm:0')

    # tensors and placeholders for fm
    if args.mla:
        out_of_fm = pretrain_graph.get_tensor_by_name('out_fm:0')
        element_wise_product = pretrain_graph.get_tensor_by_name(
            'element_wise_product:0')
        train_features_fm = pretrain_graph.get_tensor_by_name(
            'train_features_fm:0')
        train_labels_fm = pretrain_graph.get_tensor_by_name(
            'train_labels_fm:0')
        dropout_keep_fm = pretrain_graph.get_tensor_by_name(
            'dropout_keep_fm:0')
        train_phase_fm = pretrain_graph.get_tensor_by_name('train_phase_fm:0')

    # restore session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    weight_saver.restore(sess, save_file)

    # start evaluation
    num_example = len(data['Y'])
    if args.mla:
        feed_dict = {train_features_afm: data['X'], train_labels_afm: [[y] for y in data['Y']], dropout_keep_afm: [1.0,1.0], train_phase_afm: False, \
                     train_features_fm: data['X'], train_labels_fm: [[y] for y in data['Y']], dropout_keep_fm: 1.0, train_phase_fm: False}
        ao, inter, out_fm, predictions = sess.run(
            (attention_out, interactions, out_of_fm, out_of_afm),
            feed_dict=feed_dict)
    else:
        feed_dict = {
            train_features_afm: data['X'],
            train_labels_afm: [[y] for y in data['Y']],
            dropout_keep_afm: [1.0, 1.0],
            train_phase_afm: False
        }
        predictions = sess.run((out_of_afm), feed_dict=feed_dict)

    # calculate rmse
    y_pred_afm = np.reshape(predictions, (num_example, ))
    y_true = np.reshape(data['Y'], (num_example, ))

    predictions_bounded = np.maximum(y_pred_afm,
                                     np.ones(num_example) *
                                     min(y_true))  # bound the lower values
    predictions_bounded = np.minimum(predictions_bounded,
                                     np.ones(num_example) *
                                     max(y_true))  # bound the higher values
    RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded))

    print("Test RMSE: %.4f" % (RMSE))

    if args.mla:
        # select significant cases
        ao = np.reshape(ao, (num_example, 3))
        y_pred_fm = np.reshape(out_fm, (num_example, ))
        pred_abs_fm = abs(y_pred_fm - y_true)
        pred_abs_afm = abs(y_pred_afm - y_true)
        pred_abs = pred_abs_afm - pred_abs_fm

        ids = np.arange(0, num_example, 1)

        sorted_ids = sorted(ids,
                            key=lambda k: pred_abs_afm[k] + abs(ao[k][0] * ao[
                                k][1] * ao[k][2]))
        # sorted_ids = sorted(ids, key=lambda k: abs(ao[k][0]*ao[k][1]*ao[k][2]))
        for i in range(3):
            _id = sorted_ids[i]
            print('## %d: %d' % (i + 1, y_true[_id]))
            print(
                '0.33*%.2f + 0.33*%.2f + 0.33*%.2f = %.2f' %
                (inter[_id][0], inter[_id][1], inter[_id][2], y_pred_fm[_id]))
            print('%.2f*%.2f + %.2f*%.2f + %.2f*%.2f = %.2f\n'%(\
                          ao[_id][0], inter[_id][0], \
                          ao[_id][1], inter[_id][1], \
                          ao[_id][2], inter[_id][2], y_pred_afm[_id]))
Ejemplo n.º 24
0
    np.random.seed(2019)
    random_seed = 2019
    args = parse_args()

    if args.dataset == 'lastfm':
        print('load lastfm data')
        DATA_ROOT = '../data/lastfm'
    if args.dataset == 'frappe':
        print('load frappe data')
        DATA_ROOT = '../data/frappe'
    if args.dataset == 'ml-1m':
        print('load ml-1m data')
        DATA_ROOT = '../data/ml-1m'

    f1 = open(os.path.join(DATA_ROOT, 'ENSFM.txt'), 'w')
    data = DATA.LoadData(DATA_ROOT)


    with tf.Graph().as_default():
        tf.set_random_seed(random_seed)
        session_conf = tf.ConfigProto()
        session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            deep = ENSFM(data.item_map_list,data.user_field_M,data.item_field_M, args.embed_size, data.max_positive_len,args)
            deep._build_graph()
            train_op1 = tf.train.AdagradOptimizer(learning_rate=args.lr, initial_accumulator_value=1e-8).minimize(
                deep.loss)
            sess.run(tf.global_variables_initializer())

            batch_size=args.batch_size
Ejemplo n.º 25
0
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc


def verif(dtest_y, predictions):
    print("\naccuracy_score :", accuracy_score(dtest_y, predictions))
    a = classification_report(dtest_y, predictions)
    print("\nclassification report :\n", (a))
    return a


#    plt.figure(figsize=(13, 10))
#    plt.subplot(221)
#    sns.heatmap(confusion_matrix(dtest_y, predictions), annot=True, fmt="d", linecolor="k", linewidths=3)
#    plt.title("CONFUSION MATRIX", fontsize=20)

experiments = LoadData.LoadData()
data = experiments.build_data_frame()
k = 0.9
lag_size = 3000
window_size = 3000
baffle_alg = BAFFLE(lag_size,
                    window_size,
                    k=k,
                    alpha=0.3,
                    explained_variance_ratio=0.95)

with open('time_intervals.json', 'r') as fp:
    time_intervals_dict = json.load(fp)
features = ['h1', 'h2', 'h3']
column_names = list(set([column[0] for column in data]))
column_names.sort()
Ejemplo n.º 26
0
        #Y=[[1]]
        all_items = data.binded_items.values()
        #true_item_id=data.binded_items[item]
        #user_feature_embeddings = tf.nn.embedding_lookup(self.weights['user_feature_embeddings'],X_user)
        for itemID in xrange(len(all_items)):
            X_user.append(user_feature)
            item_feature=[int(feature) for feature in data.item_map[itemID].strip().split('-')[0:]]
            X_item.append(item_feature)
        feed_dict = {self.user_features: X_user, self.positive_features: X_item,self.train_phase: False, self.dropout_keep: 1.0}
        scores=self.sess.run((self.positive),feed_dict=feed_dict)
        scores=scores.reshape(len(all_items))
        return scores

if __name__ == '__main__':
    # Data loading
    args = parse_args()
    data = DATA.LoadData(args.path, args.dataset)
    if args.verbose > 0:
        print( "FM: dataset=%s, factors=%d,  #epoch=%d, batch=%d, lr=%.4f, lambda=%.1e,optimizer=%s, batch_norm=%d, keep=%.2f"
                % (args.dataset, args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda, args.optimizer, args.batch_norm, args.keep_prob))

    save_file = '../pretrain/%s_%d' % (args.dataset, args.hidden_factor)
    # Training
    t1 = time()
    model = FM(data.user_field_M, data.item_field_M, args.pretrain, save_file, args.hidden_factor, args.loss_type, args.epoch,
               args.batch_size, args.lr, args.lamda,  args.keep_prob, args.optimizer, args.batch_norm, args.verbose)
    #model.test()
    model.train(data.Train_data)
    #model.test()
    model.evaluate()
from MATRIX import *

day = 24 * 60 * 60.
year = 3660. * 24. * 365.25
week = 3660. * 24. * 7.
month = week * 4.
microarcsecond = np.pi / (180 * 3600 * 1e6)

GW_parameters = namedtuple(
    "GW_parameters",
    "logGWfrequency logAmplus logAmcross cosTheta Phi DeltaPhiPlus DeltaPhiCross"
)
GW_par = gen_rand_GW()
#GW_par = GW_parameters( logGWfrequency = np.log(2*np.pi/(3*month)), logAmplus = -12*np.log(10), logAmcross = -12*np.log(10), cosTheta = 0.5, Phi = 1.0, DeltaPhiPlus = 1*np.pi , DeltaPhiCross = np.pi )

star_positions_times_angles = LoadData(
    "MockAstrometricTimingData/gwastrometry-gaiasimu-1000-randomSphere-v2.dat")

number_of_stars = len(star_positions_times_angles)
sigma = 100 * microarcsecond / np.sqrt(1.0e9 / number_of_stars)
sigma_t = 1.667 * 1.0e-6 / np.sqrt(1.0e9 / number_of_stars)
distances = np.random.normal(3.086e16, 1.0e13,
                             len(star_positions_times_angles))


def WapperFunction_FisherMatrix(args):

    sigma = args[0]
    sigma_t = args[1]
    distances = args[2]
    d = args[3]
Ejemplo n.º 28
0
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

BATCH_SIZE = 4
IMAGE_SIZE = 256
BUFFER_SIZE = 15000
AUTOTUNE = tf.data.experimental.AUTOTUNE
SEED = 25
N_CHANNELS = 3
N_CLASSES = 2
EPOCHS = 3

dataset = LoadData.LoadData(
    "/home/hossein/synthesisData/training/images/*.png",
    "/home/hossein/synthesisData/validation/images/*.png",
    IMAGE_SIZE,
    BATCH_SIZE,
    shuffle_buffer_size=5000,
    seed=123).get_dataset()
print(dataset['train'])
print(dataset['val'])


def display_sample(display_list):
    """Show side-by-side an input image,
    the ground truth and the prediction.
    """
    plt.figure(figsize=(18, 18))

    title = ['Input Image', 'True Mask', 'Predicted Mask']
Ejemplo n.º 29
0
                               y_pred)  # I haven't checked the log_loss
            return logloss
'''         # for testing the classification accuracy  
            predictions_binary = [] 
            for item in y_pred:
                if item > 0.5:
                    predictions_binary.append(1.0)
                else:
                    predictions_binary.append(0.0)
            Accuracy = accuracy_score(y_true, predictions_binary)
            return Accuracy '''

if __name__ == '__main__':
    # Data loading
    args = parse_args()
    data = DATA.LoadData(args.path, args.dataset, args.loss_type)
    if args.verbose > 0:
        print(
            "Neural FM: dataset=%s, hidden_factor=%d, dropout_keep=%s, layers=%s, loss_type=%s, pretrain=%d, #epoch=%d, batch=%d, lr=%.4f, lambda=%.4f, optimizer=%s, batch_norm=%d, activation=%s, early_stop=%d"
            % (args.dataset, args.hidden_factor, args.keep_prob, args.layers,
               args.loss_type, args.pretrain, args.epoch, args.batch_size,
               args.lr, args.lamda, args.optimizer, args.batch_norm,
               args.activation, args.early_stop))
    activation_function = tf.nn.relu
    if args.activation == 'sigmoid':
        activation_function = tf.sigmoid
    elif args.activation == 'tanh':
        activation_function == tf.tanh
    elif args.activation == 'identity':
        activation_function = tf.identity
Ejemplo n.º 30
0
        if type == 'test':
            self.resultfile.write('\n')

        print type, ':  ', themap, thendcg[0], thendcg[2], thendcg[5], thendcg[
            9]


if __name__ == '__main__':

    dataset = 'MSLR-WEB10K'
    fold = 'Fold1'

    datafile = '/home/zengwei/data/' + dataset + '/' + fold + '/'

    train_data = LoadData(datafile + 'train.txt', dataset)
    vali_data = LoadData(datafile + 'vali.txt', dataset)
    test_data = LoadData(datafile + 'test.txt', dataset)

    nquery = len(train_data.keys())

    Nfeature = 136
    Learningrate = 0.01
    Nepisode = 100

    Lenepisode = 10

    Resultfile = 'ApprenticeRank/Result_' + dataset + '_' + fold + '_' + time.strftime(
        "%m%d", time.localtime())

    learner = RL(Nfeature, Learningrate, Lenepisode, Resultfile)