Ejemplo n.º 1
0
def load_data():
    # load all data index
    images, labels = reader.load_data_index(TRAIN_PATH)
    # split train and validate data
    t_images, t_labels, v_images, v_labels = reader.split_train_validate_data(images, labels, VALIDATE_RATE)
    # transform train data to batch data
    t_image, t_label = reader.read_data(t_images, t_labels, IMAGE_WIDTH, IMAGE_HEIGHT, BATCH_SIZE, n_class=N_CLASS)
    # transform validate data to batch data
    v_image, v_label = reader.read_data(v_images, v_labels, IMAGE_WIDTH, IMAGE_HEIGHT, len(v_images), n_class=N_CLASS)
    return len(t_images), t_image, t_label, v_image, v_label
def train_random_forest_on_raw_data(hyperparams={}, save=True):
    a, b, c, labels = read_data()
    time_series = a + b + c
    time_series = [ts.flatten() for ts in time_series]

    avg = 0
    for i in range(20):
        l_train, l_test, label_train, label_test = train_test_split(
            time_series, labels, test_size=0.2)
        rf = RandomForestClassifier(**hyperparams)
        rf.fit(np.array(l_train), label_train)
        avg += np.mean(rf.predict(l_test) == label_test)

    print("avg pred on raw data: ", avg / 20)
    results = {'accuracy': avg / 20, 'rforest_hyperparameters': hyperparams}

    if save:
        with open('classification_results/raw_data_results.json', 'wt') as f:
            json.dump(results, f)

    rf = RandomForestClassifier(**hyperparams)
    rf.fit(time_series, labels)
    plot_feature_importance(rf, 'raw data', save=True, filtration='')

    return results
def compute_persistence(edge_length=max_edge_length, filtration='rips'):
    if filtration not in ('rips', 'alpha'):
        raise ValueError('Please indicate filtration = "rips" or filtration = "alpha"')
    a, b, c, labels = read_data()
    time_series = a + b + c
    simplex_trees = [0] * len(time_series)

    print('Computing persistence diagrams with {} filtration...'.format(filtration))
    for i, ts in enumerate(time_series):
        if not i % 50:
            print('Computing persistence diagram {}/{}'.format(i, len(time_series)))

        if filtration is 'rips':
            cplx = gudhi.RipsComplex(points=ts, max_edge_length=edge_length)
            simplex_tree = cplx.create_simplex_tree(max_dimension=2)

        else:
            cplx = gudhi.AlphaComplex(points=ts)
            simplex_tree = cplx.create_simplex_tree()

        simplex_trees[i] = simplex_tree
        simplex_tree.persistence(persistence_dim_max=False)
        simplex_tree.write_persistence_diagram('intermediary_data/persistence_diagrams/{}'.format(i))

    return simplex_trees
Ejemplo n.º 4
0
def split():
    data = read_data()
    speaker = data[:, SPEAKER_COL]
    
    text = data[:, TEXT_COL]
    np.save(TEXT_FILENAME, text)
    np.save(LABELS_FILENAME, speaker)
Ejemplo n.º 5
0
def training_from_flag(flags):
    """
    Training interface. 1. Read in data
                        2. Initialize network
                        3. Train network
                        4. Record flags
    :param flag: The training flags read from command line or parameter.py
    :return: None
    """
    if flags.use_cpu_only:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    # # Import the data
    train_loader, test_loader = data_reader.read_data(flags)

    # Reset the boundary if normalized
    if flags.normalize_input:
        flags.geoboundary_norm = [-1, 1, -1, 1]

    print("Geometry boundary is set to:", flags.geoboundary)

    # Make Network
    print("Making network now")
    ntwk = Network(Lorentz, flags, train_loader, test_loader)

    # Training process
    print("Start training now...")
    #ntwk.pretrain()
    #ntwk.load_pretrain()
    ntwk.train()

    # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags object
    write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
def TF_IDF():
    train_data = read_train_data()
    test_data = read_test_data()
    data = read_data()
    vector_space = VectorSpaceConverter(data).get_vector_space_documents_and_tokens()
    tf = []
    idf = []
    for i in range(len(data)):
        tf.append(computeTF(vector_space[i], len(vector_space[i])))
    df = [0] * len(vector_space[0])
    for vector in vector_space:
        for i in range(len(vector)):
            if vector[i] != 0:
                df[i] += 1
    for i in range(len(data)):
        idf.append(computeIDF(len(data), vector_space[i], df[i]))
    TF_IDF = []
    for i in range(len(tf)):
        multiply = []
        for j in range(len(idf[0])):
            multiply.append(tf[i][j] * idf[i][j])
        TF_IDF.append(multiply)


    return TF_IDF
Ejemplo n.º 7
0
def training_from_flag(flags):
    """
    Training interface. 1. Read data 2. initialize network 3. train network 4. record flags
    :param flag: The training flags read from command line or parameter.py
    :return: None
    """
    if flags.use_cpu_only:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        x_range=flags.x_range,
        y_range=flags.y_range,
        geoboundary=flags.geoboundary,
        batch_size=flags.batch_size,
        normalize_input=flags.normalize_input,
        data_dir=flags.data_dir,
        test_ratio=flags.test_ratio)
    # Reset the boundary is normalized
    if flags.normalize_input:
        flags.geoboundary_norm = [-1, 1, -1, 1]

    print("Boundary is set at:", flags.geoboundary)
    print("Making network now")

    # Make Network
    ntwk = Network(Forward, flags, train_loader, test_loader)

    # Training process
    print("Start training now...")
    ntwk.train()

    # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct
    write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
Ejemplo n.º 8
0
def main(flags):

    # initialize data reader

    # optional for what type of layer the network ends with
    if len(flags.tconv_dims) == 0:
        output_size = flags.fc_filters[-1]
    else:
        output_size = flags.tconv_dims[-1]

    features, labels, train_init_op, valid_init_op = data_reader.read_data(
        input_size=flags.input_size,
        output_size=output_size - 2 * flags.clip,
        x_range=flags.x_range,
        y_range=flags.y_range,
        cross_val=flags.cross_val,
        val_fold=flags.val_fold,
        batch_size=flags.batch_size,
        shuffle_size=flags.shuffle_size)

    # make network
    ntwk = network_maker.CnnNetwork(features,
                                    labels,
                                    utils.my_model_fn_tens,
                                    flags.batch_size,
                                    clip=flags.clip,
                                    fc_filters=flags.fc_filters,
                                    tconv_Fnums=flags.tconv_Fnums,
                                    tconv_dims=flags.tconv_dims,
                                    tconv_filters=flags.tconv_filters,
                                    n_filter=flags.n_filter,
                                    n_branch=flags.n_branch,
                                    reg_scale=flags.reg_scale,
                                    learn_rate=flags.learn_rate,
                                    decay_step=flags.decay_step,
                                    decay_rate=flags.decay_rate)
    # define hooks for monitoring training
    train_hook = network_helper.TrainValueHook(flags.verb_step,
                                               ntwk.loss,
                                               ckpt_dir=ntwk.ckpt_dir,
                                               write_summary=True)
    lr_hook = network_helper.TrainValueHook(flags.verb_step,
                                            ntwk.learn_rate,
                                            ckpt_dir=ntwk.ckpt_dir,
                                            write_summary=True,
                                            value_name='learning_rate')
    valid_hook = network_helper.ValidationHook(flags.eval_step,
                                               valid_init_op,
                                               ntwk.labels,
                                               ntwk.logits,
                                               ntwk.loss,
                                               ntwk.preconv,
                                               ntwk.preTconv,
                                               ckpt_dir=ntwk.ckpt_dir,
                                               write_summary=True)
    # train the network
    ntwk.train(train_init_op,
               flags.train_step, [train_hook, valid_hook, lr_hook],
               write_summary=True)
Ejemplo n.º 9
0
def get_white_wine():
    filename = data_folder + 'wine/winequality-white.csv'
    apu = data_reader.read_data(filename)
    data = np.zeros((len(apu) - 1, len(apu[0][0].split(";"))))
    for k_row in range(1, data.shape[0]):
        data[k_row - 1, :] = apu[k_row][0].split(";")
        #0th row=names
    return data
Ejemplo n.º 10
0
def predict(flags, geo2spec, data_path, save_path):
    #Clear the default graph first for resolving potential name conflicts
    tf.reset_default_graph()
    spec2geo_flag = not geo2spec #Get geo2spec from spec2geo flagg
    ckpt_dir = os.path.join(os.path.abspath(''), 'models', flags.model_name)
    
    clip, forward_fc_filters, tconv_Fnums, tconv_dims, tconv_filters, n_filter, n_branch, \
    reg_scale, backward_fc_filters, conv1d_filters, conv_channel_list, batch_size = network_helper.get_parameters(ckpt_dir)
    print(ckpt_dir)
    # initialize data reader
    if len(tconv_dims) == 0:
        output_size = fc_filters[-1]
    else:
        output_size = tconv_dims[-1]
    features, labels, train_init_op, valid_init_op = data_reader.read_data(input_size=flags.input_size,
                                                               output_size=output_size-2*clip,
                                                               x_range=flags.x_range,
                                                               y_range=flags.y_range,
							        geoboundary=flags.geoboundary,
                                                               cross_val=flags.cross_val,
                                                               val_fold=flags.val_fold,
                                                               batch_size=batch_size,
                                                               shuffle_size=flags.shuffle_size,
								data_dir = flags.data_dir,
							        normalize_input = flags.normalize_input,
                                                                test_ratio = 0.2)

    #if the input is normalized
    if flags.normalize_input:
		    flags.boundary = [-1, 1, -1, 1]

    #Adjust the input of geometry and spectra given the flag
    if (spec2geo_flag):
        geometry = features;
        spectra, pred_init_op = read_tensor_from_test_data(data_path, batch_size)
        print("Your are inferring from spectra to geometry")
    else:
        geometry, pred_init_op = read_tensor_from_test_data(data_path, batch_size)
        spectra = labels
        print("Your are inferring from geometry to spectra")

    # make network
    ntwk = Tandem_network_maker.TandemCnnNetwork(geometry, spectra, model_maker.tandem_model, batch_size,
                                clip=clip, forward_fc_filters=forward_fc_filters,
                                backward_fc_filters = backward_fc_filters,reg_scale=reg_scale,
	                        learn_rate=flags.learn_rate,tconv_Fnums=tconv_Fnums,
				tconv_dims=tconv_dims,n_branch=n_branch,
			        tconv_filters=tconv_filters, n_filter=n_filter,
				decay_step=flags.decay_step, decay_rate=flags.decay_rate, geoboundary = flags.geoboundary,
                                conv1d_filters = conv1d_filters, conv_channel_list = conv_channel_list)

    if (spec2geo_flag):
        ntwk.predict_spec2geo([train_init_op, pred_init_op], ckpt_dir = ckpt_dir, 
                                model_name = flags.model_name, save_file = save_path)
    else:
        ntwk.predict_geo2spec([train_init_op, pred_init_op], ckpt_dir = ckpt_dir, 
                                model_name = flags.model_name, save_file = save_path)
Ejemplo n.º 11
0
def get_concrete():
    #8 mittausta ja target
    filename = data_folder + 'concrete/Concrete_Data.txt'
    apu = data_reader.read_data(filename)
    data = np.zeros((len(apu) - 1, len(apu[0])))
    #0s rivi=nimet
    for k_row in range(1, data.shape[0]):
        data[k_row, :] = apu[k_row]
    return data
Ejemplo n.º 12
0
def load_data():
    images, labels = reader.load_csv_index(TEST_PATH)
    batch_image, batch_label = reader.read_data(images,
                                                labels,
                                                IMAGE_WIDTH,
                                                IMAGE_HEIGHT,
                                                BATCH_SIZE,
                                                n_class=N_CLASS)
    return batch_image, batch_label, len(images)
def fit_and_eval_bayes_model_with_validation_data():
    X_train, y_train, X_test, y_test = read_data()
    bayes_model = ToxicCommentBayesianEstimator().fit(X_train, y_train)
    predictions = bayes_model.predict(X_test)
    toxic_correct_preds = 0
    for index, row in y_test.iterrows():
        if row["toxic"] == predictions.iloc[index]["toxic"]:
            toxic_correct_preds += 1
    print("Toxic correct predictions: %i out of %i" %
          (toxic_correct_preds, predictions.shape[0]))
Ejemplo n.º 14
0
    def __init__(self, dubina=None):
        self.dubina = dubina
        if self.dubina != None:
            self.dubina = int(dubina)

        self.filename = sys.argv[1]
        self.filenameTrain = sys.argv[2]
        self.data = read_data(self.filename)
        self.testData = read_test_data(self.filenameTrain)
        self.testGoals = list(read_data(self.filenameTrain).values())[-1]
        self.goals = get_goals(self.data)
        self.goalName = list(self.data.keys())[-1]
        self.vrijednostiZnacajki = get_attribute_values(self.data)
        znacajke = list(self.data.keys())[:-1]
        stablo = self.fit(self.data, self.data, znacajke, self.goalName)
        branches = list(self.get_branches(stablo))
        self.print_branches(branches)
        self.predicted = self.test(self.testData, stablo)
        self.matrica_zabune()
Ejemplo n.º 15
0
def upload_file():
	if request.method == 'POST':
        # check if the post request has the file part
		if 'filename' not in request.files:
			flash('No file part')
			return redirect(request.url)
		file = request.files['filename']
		if file.filename == '':
			flash('No file selected for uploading')
			return redirect(request.url)
		if file and allowed_file(file.filename):

			# delete all previous entries
			for file_name in os.listdir('static/'):
				if file_name.endswith('.csv'):
					os.remove(os.path.join(app.config['UPLOAD_FOLDER'], file_name))

			# save the file
			filename = secure_filename(file.filename)
			file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
			flash('File successfully uploaded')

			# delete all previous produced images
			for file_name in os.listdir('static/'):
				if file_name.endswith('.png'):
					os.remove(os.path.join(app.config['UPLOAD_FOLDER'], file_name))

			# get correlation matrix
			headers, clear_X, _ = read_data("{}/{}".format(UPLOAD_FOLDER, filename), False, False)
			corr_mat(headers, clear_X)

			# do som analysis
			headers, clear_X, droped = read_data("{}/{}".format(UPLOAD_FOLDER, filename), True, True)
			analyze_som(headers, clear_X)

			files = os.listdir('static/')

			# return redirect('/')
			return render_template("results.html", files=files) #render results and return a listo of all static files
		else:
			flash('Allowed file type is csv')
			return redirect(request.url)
def get_data():
    data_x = reader.read_data("pickle/img_data.pickle")
    data_y = reader.read_data("pickle/img_label.pickle")

    tr_lim = int(len(data_x) * 70 / 100)

    X_train, Y_train = data_x[:tr_lim], data_y[:tr_lim]
    X_test, Y_test = data_x[tr_lim:], data_y[tr_lim:]

    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')
    return X_train, Y_train, X_test, Y_test
Ejemplo n.º 17
0
def tandemmain(flags):
    # initialize data reader
    #Set the environment variable for if this is a cpu only script
    if flags.use_cpu_only:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    geometry, spectra, train_init_op, valid_init_op = data_reader.read_data(input_size=0,
                                                               output_size=0,
                                                               x_range=flags.x_range,
                                                               y_range=flags.y_range,
								geoboundary = flags.geoboundary,
                                                               cross_val=flags.cross_val,
                                                               val_fold=flags.val_fold,
                                                               batch_size=flags.batch_size,
                                                               shuffle_size=flags.shuffle_size,
							        data_dir = flags.data_dir,
								normalize_input = flags.normalize_input,
                                                                test_ratio = 0.1)
  	#If the input is normalized, then make the boundary useless
    if flags.normalize_input:
        flags.geoboundary = [-1, 1, -1, 1]

    print("making network now")
    # make network
    ntwk = Tandem_network_maker.TandemCnnNetwork(geometry, spectra, model_maker.tandem_model, flags.batch_size,
                            clip=flags.clip, forward_fc_filters=flags.forward_fc_filters,
                            backward_fc_filters=flags.backward_fc_filters,reg_scale=flags.reg_scale,
                            learn_rate=flags.learn_rate,tconv_Fnums=flags.tconv_Fnums,
                            tconv_dims=flags.tconv_dims,n_branch=flags.n_branch,
                            tconv_filters=flags.tconv_filters, n_filter=flags.n_filter,
                            decay_step=flags.decay_step, decay_rate=flags.decay_rate,
                            geoboundary = flags.geoboundary, conv1d_filters = flags.conv1d_filters,
                            conv_channel_list = flags.conv_channel_list)
    
    print("Setting the hooks now")
    # define hooks for monitoring training
    train_loss_hook_list = []
    losses = [ntwk.loss, ntwk.mse_loss, ntwk.reg_loss, ntwk.bdy_loss, ntwk.learn_rate]
    loss_names = ["train_loss", "mse_loss", "regularizaiton_loss", "boundary_loss","Learning_rate"]
    #Forward detailed loss hooks, the training detail depend on input flag
    forward_hooks = get_hook_list(flags, ntwk, valid_init_op, losses, loss_names, "forward_", flags.detail_train_loss_forward) 
    #Assume Tandem one always show the training detailed loss
    tandem_hooks = get_hook_list(flags, ntwk, valid_init_op,  losses, loss_names, "tandem_", detail_train_loss = True)
    
    # train the network
    print("Start the training now")
    #ntwk.train(train_init_op, flags.train_step, [train_hook, valid_hook, lr_hook], write_summary=True)
    ntwk.train(train_init_op, flags.train_step, flags.backward_train_step, forward_hooks, tandem_hooks,
                write_summary=True, load_forward_ckpt = flags.forward_model_ckpt)

    #Write the flag into the current folder and move it to the models/ folder along with the best validation error
    flag_reader.write_flags_and_BVE(flags, ntwk.best_validation_loss)
    #Put the parameter.txt file into the latest folder from model
    put_param_into_folder()
Ejemplo n.º 18
0
def get_abalone():
    filename = data_folder + 'abalone/abalone.data'
    apu = data_reader.read_data(filename)
    data = np.zeros((len(apu), len(apu[0])))
    for k_row in range(data.shape[0]):
        data[k_row, 1:] = apu[k_row][1:]
        #code categorical sex as 0=male, 1=female
        if apu[k_row][0] == 'M':
            data[k_row, 0] = 0
        else:
            data[k_row, 0] = 1
    return data
Ejemplo n.º 19
0
def test_k_recall(restart_iter, delay, clean_prefix, error_prefix):
    recall = np.zeros(delay)

    aid = AdaptiveDetector()
    aid.it = 0
    aid.fp = 200

    # first read previous clean data
    d5 = read_data(clean_prefix, restart_iter - 5)
    d4 = read_data(clean_prefix, restart_iter - 4)
    d3 = read_data(clean_prefix, restart_iter - 3)
    d2 = read_data(clean_prefix, restart_iter - 2)
    d1 = read_data(clean_prefix, restart_iter - 1)

    for k in range(delay):  # detecting after k iterations
        d = read_data(error_prefix, k)
        recall[k] = aid.detect(d, d1, d2, d3, d4, d5)
        aid.it += 1

        d5 = d4
        d4 = d3
        d3 = d2
        d2 = d1
        d1 = d
    print recall
    return recall
def print_data_stats():
    X_train, y_train, X_test, y_test = read_data()
    print("Train data stats:")
    for column in y_train.columns:
        value_counts = y_train[column].value_counts()
        print("%s: Zeroes: %i, Ones: %i, zero perc: %.5f" %
              (column, value_counts[0], value_counts[1], value_counts[0] / (value_counts[1] + value_counts[0])))

    print("Validation data stats:")
    for column in y_test.columns:
        value_counts = y_test[column].value_counts()
        print("%s: Zeroes: %i, Ones: %i, zero perc: %.5f" %
              (column, value_counts[0], value_counts[1], value_counts[0] / (value_counts[1] + value_counts[0])))
Ejemplo n.º 21
0
def main():
    # Read the sentences and alignments from files. Note that we swap the alignments.
    english_sentences, foreign_sentences, global_alignments = data_reader.read_data(
    )

    # For all sentences: Extract phrases, count reorderings and collect other useful statistics.
    count_reorderings(len(english_sentences), english_sentences,
                      foreign_sentences, global_alignments)

    # Write the reordering statistics to files
    output_reordering_statistics()

    # Show some insightful graphs
    show_reorderings_probabilities()
    show_reorderings_comparison_histogram()
    show_histograms_orientation_vs_phrase_length()
Ejemplo n.º 22
0
def training_from_flag(flags):
    """
    Training interface. 1. Read data 2. initialize network 3. train network 4. record flags
    :param flag: The training flags read from command line or parameter.py
    :return: None
    """
    # Get the data
    train_loader, test_loader = data_reader.read_data(flags)
    print("Making network now")

    # Make Network
    ntwk = Network(VAE, flags, train_loader, test_loader)

    # Training process
    print("Start training now...")
    ntwk.train()

    # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct
    write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir)
Ejemplo n.º 23
0
def evaluate_from_model(model_dir):
    """
    Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval
    :param model_dir: The folder to retrieve the model
    :return: None
    """
    # Retrieve the flag object
    print("Retrieving flag object for parameters")
    flags = flag_reader.load_flags(os.path.join("models", model_dir))
    flags.eval_model = model_dir  # Reset the eval mode
    flags.batch_size = 1  # For backprop eval mode, batchsize is always 1

    # Get the data
    train_loader, test_loader = data_reader.read_data(
        x_range=flags.x_range,
        y_range=flags.y_range,
        geoboundary=flags.geoboundary,
        batch_size=flags.batch_size,
        normalize_input=flags.normalize_input,
        data_dir=flags.data_dir)
    print("Making network now")

    # Make Network
    ntwk = Network(Backprop,
                   flags,
                   train_loader,
                   test_loader,
                   inference_mode=True,
                   saved_model=flags.eval_model)

    # Evaluation process
    print("Start eval now:")
    pred_file, truth_file = ntwk.evaluate()

    # Plot the MSE distribution
    plotMSELossDistrib(pred_file, truth_file, flags)
    print("Evaluation finished")
Ejemplo n.º 24
0
def test_fp(prefix):
    aid = AdaptiveDetector()

    d5 = read_data(prefix, 0)
    d4 = read_data(prefix, 1)
    d3 = read_data(prefix, 2)
    d2 = read_data(prefix, 3)
    d1 = read_data(prefix, 4)

    # start from the 6th frame
    for it in range(5, 1001):
        d = read_data(prefix, it)
        aid.fp += aid.detect(d, d1, d2, d3, d4, d5)
        aid.it += 1
        d5 = d4
        d4 = d3
        d3 = d2
        d2 = d1
        d1 = d
        print("it:", it, " fp:", aid.fp)
Ejemplo n.º 25
0
def test_0_recall(prefix):
    aid = AdaptiveDetector()

    d5 = read_data(prefix, 1)
    d4 = read_data(prefix, 2)
    d3 = read_data(prefix, 3)
    d2 = read_data(prefix, 4)
    d1 = read_data(prefix, 5)

    # start from the 6th frame
    recall = 0
    for it in range(6, 1001):
        d = read_data(prefix, it)

        # insert an error
        x, y = random.randint(0, 479), random.randint(0, 479)
        org = d[x, y]
        truth = False
        if it % 2 == 0:
            truth = True
            d[x, y] = get_flip_error(org)

        hasError = aid.detect(d, d1, d2, d3, d4, d5)
        if hasError and truth:  # true positive
            recall += 1
        if hasError and not truth:  # false positive
            aid.fp += 1

        aid.it += 1

        d[x, y] = org  # restore the correct value before next detection

        d5 = d4
        d4 = d3
        d3 = d2
        d2 = d1
        d1 = d

        print("it:", it, " recall:", recall, " fp:", aid.fp)
Ejemplo n.º 26
0
def evaluatemain(flags, eval_forward):
    #Clear the default graph first for resolving potential name conflicts
    #Set the environment variable for if this is a cpu only script
    if flags.use_cpu_only:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    print("Start Evaluating now...")
    TK = time_recorder.time_keeper(time_keeping_file="data/time_keeper.txt")

    tf.reset_default_graph()
    ckpt_dir = os.path.join(os.path.abspath(''), 'models', flags.model_name)

    decoder_fc_filters, encoder_fc_filters, spectra_fc_filters, conv1d_filters, \
    filter_channel_list, geoboundary, latent_dim, batch_size = network_helper.get_parameters(ckpt_dir)
    batch_size = batch_size[0]  #Get rid of the list
    geometry, spectra, train_init_op, valid_init_op = data_reader.read_data(
        input_size=flags.input_size,
        output_size=300,
        x_range=flags.x_range,
        y_range=flags.y_range,
        geoboundary=flags.geoboundary,
        cross_val=flags.cross_val,
        val_fold=flags.val_fold,
        batch_size=flags.batch_size,
        shuffle_size=flags.shuffle_size,
        data_dir=flags.data_dir,
        normalize_input=flags.normalize_input,
        test_ratio=0.9999)
    #if the input is normalized
    if flags.normalize_input:
        flags.boundary = [-1, 1, -1, 1]
    print("Boundary read from meta_file is ", geoboundary)
    print("batch_size read from meta_file is ", batch_size)
    print("latent_dim read from meta_file is ", latent_dim)
    # make network
    ntwk = VAE_network_maker.VAENetwork(
        geometry,
        spectra,
        model_maker.VAE,
        batch_size,
        latent_dim,
        spectra_fc_filters=spectra_fc_filters,
        decoder_fc_filters=decoder_fc_filters,
        encoder_fc_filters=encoder_fc_filters,
        reg_scale=flags.reg_scale,
        learn_rate=flags.learn_rate,
        decay_step=flags.decay_step,
        decay_rate=flags.decay_rate,
        geoboundary=flags.geoboundary,
        conv1d_filters=conv1d_filters,
        filter_channel_list=filter_channel_list)

    # evaluate the results if the results do not exist or user force to re-run evaluation
    save_file = os.path.join(os.path.abspath(''), 'data',
                             'test_pred_{}.csv'.format(flags.model_name))

    if flags.force_run or (not os.path.exists(save_file)):
        print('Evaluating the model ...')
        #pred_file, truth_file = ntwk.evaluate(valid_init_op, ckpt_dir=ckpt_dir,
        Xpred_file = ntwk.evaluate(valid_init_op,
                                   train_init_op,
                                   ckpt_dir=ckpt_dir,
                                   model_name=flags.model_name,
                                   write_summary=True,
                                   eval_forward=eval_forward,
                                   time_keeper=TK)

        print("Prediction File output at:", Xpred_file)
        unpack_Xpred(Xpred_file, batch_size)
        #pred_file, truth_file = get_spectra_from_geometry(Xpred_file)
    """
Ejemplo n.º 27
0
    warehouses_order_ids = dd(lambda: dict())
    warehouses_drone_numbers = dd(lambda: dict())
    total_order_weight = sum([order['weight'] for order in order_info.values()])
    # print total_order_weight
    # print simulation_parameters[2]
    for (cluster, warehouse) in enumerate(cluster_to_warehouses):
        assigned_orders = [order for (order, assignment) in enumerate(cluster_assignments) if assignment == cluster]
        total_order_weight_for_warehouse = sum([order_info[order]['weight'] for order in assigned_orders])
        # print total_order_weight_for_warehouse
        warehouses_to_orders[warehouse]['orders'] = [order for order in assigned_orders]
        warehouses_order_ids[warehouse] = [order for order in assigned_orders]
        warehouses_to_orders[warehouse]['n_drones'] = floor(float(total_order_weight_for_warehouse)/total_order_weight*simulation_parameters[2])
        warehouses_drone_numbers[warehouse] = [None, None]
        warehouses_drone_numbers[warehouse][0] = floor(float(total_order_weight_for_warehouse)/float(total_order_weight+len(warehouse_info))*simulation_parameters[2])
        warehouses_drone_numbers[warehouse][1] = floor((1-float(total_order_weight_for_warehouse)/float(total_order_weight+len(warehouse_info)))*simulation_parameters[2])
        # print warehouses_to_orders[warehouse]
        # print len(warehouses_to_orders[warehouse])

    #print warehouses_to_orders
    return warehouses_drone_numbers, warehouses_order_ids

if __name__ == "__main__":
    simulation_parameters, weights, warehouse_info, order_info, order_location_matrix = read_data("busy_day.in")
    assign_orders_and_drones_to_warehouses(simulation_parameters, weights, warehouse_info, order_info, order_location_matrix)






Ejemplo n.º 28
0
def decay_learning_rate(leaning_rate, decay_rate, ecphocs):
    return leaning_rate / (decay_rate * ecphocs + 1)


def graident_descending(theta, x, y, learning_rate, batch_size, ecphocs=200):
    # global cost
    costs = []
    learning_rates = []
    batchs = len(x) // batch_size
    if len(x) % batch_size != 0:
        batchs += 1
    for ecphocs in range(ecphocs):
        for batch in range(batchs):
            start = batch * batch_size % len(x)
            end = min(start + batch_size, len(x))
            t_x = x[start:end]
            t_y = y[start:end]
            theta = theta - learning_rate * get_grade(theta, t_x, t_y)
            cost = get_cost(theta, x, y)
        costs.append(cost)
        learning_rate = decay_learning_rate(learning_rate, 0.99, ecphocs)
        learning_rates.append(learning_rate)
    show.show_cost(costs)
    show.show_cost(learning_rates)


x, y = data_reader.read_data()
theta = np.zeros((x.shape[1], 1))
learning_rate = 0.0001
graident_descending(theta, x, y, learning_rate, batch_size=10)
Ejemplo n.º 29
0
def evaluatemain(flags, eval_forward):
    #Clear the default graph first for resolving potential name conflicts
    tf.reset_default_graph()
    TK = time_recorder.time_keeper(time_keeping_file="data/time_keeper.txt")

    ckpt_dir = os.path.join(os.path.abspath(''), 'models', flags.model_name)
    clip, forward_fc_filters, tconv_Fnums, tconv_dims, tconv_filters, \
    n_filter, n_branch, reg_scale = network_helper.get_parameters(ckpt_dir)
    print(ckpt_dir)
    # initialize data reader
    if len(tconv_dims) == 0:
        output_size = fc_filters[-1]
    else:
        output_size = tconv_dims[-1]
    features, labels, train_init_op, valid_init_op = data_reader.read_data(
        input_size=flags.input_size,
        output_size=output_size - 2 * clip,
        x_range=flags.x_range,
        y_range=flags.y_range,
        geoboundary=flags.geoboundary,
        cross_val=flags.cross_val,
        val_fold=flags.val_fold,
        batch_size=flags.batch_size,
        shuffle_size=flags.shuffle_size,
        normalize_input=flags.normalize_input,
        data_dir=flags.data_dir,
        test_ratio=0.01)  #negative test_ratio means test from eval

    #if the input is normalized
    if flags.normalize_input:
        flags.boundary = [-1, 1, -1, 1]

    # make network
    ntwk = Backprop_network_maker.BackPropCnnNetwork(
        features,
        labels,
        model_maker.back_prop_model,
        flags.batch_size,
        clip=flags.clip,
        forward_fc_filters=flags.forward_fc_filters,
        reg_scale=flags.reg_scale,
        learn_rate=flags.learn_rate,
        tconv_Fnums=flags.tconv_Fnums,
        tconv_dims=flags.tconv_dims,
        n_branch=flags.n_branch,
        tconv_filters=flags.tconv_filters,
        n_filter=flags.n_filter,
        decay_step=flags.decay_step,
        decay_rate=flags.decay_rate,
        geoboundary=flags.boundary)

    # evaluate the results if the results do not exist or user force to re-run evaluation
    save_file = os.path.join(os.path.abspath(''), 'data',
                             'test_pred_{}.csv'.format(flags.model_name))
    if flags.force_run or (not os.path.exists(save_file)):
        print('Evaluating the model ...')
        pred_file, truth_file = ntwk.evaluate(
            valid_init_op,
            train_init_op,
            ckpt_dir=ckpt_dir,
            back_prop_epoch=flags.back_prop_epoch,
            stop_thres=flags.stop_threshold,
            verb_step=flags.verb_step,
            model_name=flags.model_name,
            write_summary=True,
            eval_forward=eval_forward,
            time_recorder=TK)
    else:
        pred_file = save_file
        truth_file = os.path.join(os.path.abspath(''), 'data',
                                  'test_truth.csv')

    mae, mse = compare_truth_pred(pred_file, truth_file)

    plt.figure(figsize=(12, 6))
    plt.hist(mse, bins=100)
    plt.xlabel('Mean Squared Error')
    plt.ylabel('cnt')
    plt.suptitle('Backprop (Avg MSE={:.4e})'.format(np.mean(mse)))
    plt.savefig(
        os.path.join(os.path.abspath(''), 'data',
                     'Backprop_{}.png'.format(flags.model_name)))
    plt.show()
    print('Backprop (Avg MSE={:.4e})'.format(np.mean(mse)))
Ejemplo n.º 30
0
        # print total_order_weight_for_warehouse
        warehouses_to_orders[warehouse]['orders'] = [
            order for order in assigned_orders
        ]
        warehouses_order_ids[warehouse] = [order for order in assigned_orders]
        warehouses_to_orders[warehouse]['n_drones'] = floor(
            float(total_order_weight_for_warehouse) / total_order_weight *
            simulation_parameters[2])
        warehouses_drone_numbers[warehouse] = [None, None]
        warehouses_drone_numbers[warehouse][0] = floor(
            float(total_order_weight_for_warehouse) /
            float(total_order_weight + len(warehouse_info)) *
            simulation_parameters[2])
        warehouses_drone_numbers[warehouse][1] = floor(
            (1 - float(total_order_weight_for_warehouse) /
             float(total_order_weight + len(warehouse_info))) *
            simulation_parameters[2])
        # print warehouses_to_orders[warehouse]
        # print len(warehouses_to_orders[warehouse])

    #print warehouses_to_orders
    return warehouses_drone_numbers, warehouses_order_ids


if __name__ == "__main__":
    simulation_parameters, weights, warehouse_info, order_info, order_location_matrix = read_data(
        "busy_day.in")
    assign_orders_and_drones_to_warehouses(simulation_parameters, weights,
                                           warehouse_info, order_info,
                                           order_location_matrix)
def main():
    simulation_parameters, weights, warehouse_info, order_info, order_location_matrix = read_data(sys.argv[1])
    r, c, d, sim_deadline, drone_load = simulation_parameters
    warehouse_drone_nums, warehouse_order_ids = assign_orders_and_drones_to_warehouses(simulation_parameters, weights,
                                                                                      warehouse_info,
                                                                                      order_info, order_location_matrix)
    manager = WarehouseManager(d, warehouse_drone_nums, warehouse_order_ids, warehouse_info, order_info, drone_load)
    manager.start_delivering(order_info)
Ejemplo n.º 32
0
    df = DataFrame(Data, columns=labels)

    feat_num = len(labels)

    plt.figure(figsize=(feat_num, feat_num / 2))
    corrMatrix = df.corr().round(2)
    sn.heatmap(corrMatrix, annot=True)

    plt.tight_layout()

    # bytes_image = io.BytesIO()
    # plt.savefig(bytes_image, format='png', bbox_inches='tight')
    # bytes_image.seek(0)
    plt.savefig("static/corr-{}.png".format(time), bbox_inches='tight')

    if __name__ == "__main__":
        plt.show()
    else:
        plt.close("all")
    # return bytes_image


if __name__ == "__main__":
    # for-debugging
    from data_reader import read_data

    headers, clear_X, _ = read_data("Agias-Sofias_2018.csv", False, False)
    # headers, clear_X, _ = read_data("Auth_2018.csv", False, False)
    corr_mat(headers, clear_X)
Ejemplo n.º 33
0
        return "Variable: %s" %(str (self))

    def __eq__ (self, obj):
        return hasattr (obj, 'var') and hasattr (obj, 'values') and self.var == obj.var and self.values == obj.values

    def __lt__ (self, other):
        if isinstance (other, Variable):
            return self.var < other.var
        else:
            raise TypeError ('`Other` should be of type `Variable`, but is %s' %(type (other)))
            
    def __hash__(self):
        return hash (self.var + ','.join(self.values))

from data_reader import read_data
data = read_data ('data.txt')

HM = Variable('H3K27me3', ['Present', 'Absent']); HS = Variable('H2AK126su', ['Present', 'Absent']); H4A = Variable('H4AK5ac', ['Present', 'Absent']);
HP = Variable('H2AS1ph', ['Present', 'Absent']); H3A = Variable('H3K27ac', ['Present', 'Absent']); TRANS = Variable('Transcription', ['Inactive', 'Active']);
    
V = [HM, HS, H4A, HP, H3A, TRANS]

def get_g1 ():

    E = [(HM, HS), (HM, H3A), (H3A, H4A), (H4A, TRANS), (H3A, HP)]

    G = BN (V, E, data)

    return G

def get_g2 ():