Python load_raw_dataの例、utils.load_raw_data Pythonの例

コード例 #1

0

ファイルを表示

 def get_predictions_file(self):
     # Get predictions
     test_x = features_target_split(self.test, self.drop_index)[0]
     preds = self.train_and_predict(self.train, test_x)
     # Compute predictions DataFrame
     preds_df = self.test[INDEX_COLUMNS]
     preds_df[TARGET_LABEL] = preds
     test_raw = utils.fix_shop_id(utils.load_raw_data('test.csv.gz'))
     preds_df = test_raw.merge(preds_df, on=['shop_id', 'item_id'])
     # Save predictions to file
     logging.info('saving predictions')
     fpath = get_file_path()
     preds_save = preds_df.set_index('ID')[TARGET_LABEL]
     preds_save.to_csv(fpath, header=True)
     logging.info('%s predictions saved to %s' % (len(preds_save), fpath))

コード例 #2

0

ファイルを表示

ファイル: TrainGlaze2Comp.py プロジェクト: jwickens/predictglazy

def train():
    raw_data = load_raw_data()
    material_dict = GlazeMaterialDictionary(raw_data)
    model = Net(len(material_dict))
    train_loader, val_loader = get_data_loaders(material_dict, raw_data)
    loss = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    trainer = create_supervised_trainer(model, optimizer, loss)
    metrics = {'L2': ignite.metrics.MeanPairwiseDistance()}
    evaluator = create_supervised_evaluator(model, metrics)
    saver = ignite.handlers.ModelCheckpoint('./checkpoints/models',
                                            'chkpoint',
                                            save_interval=2,
                                            n_saved=4,
                                            create_dir=True,
                                            require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, saver,
                              {'glaze_net_3': model})

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(trainer):
        print("Epoch[{}] Loss: {:.10f}".format(trainer.state.epoch,
                                               trainer.state.output))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(trainer):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        print("Training Results - Epoch: {}  Avg L2: {:.2f}".format(
            trainer.state.epoch, metrics['L2']))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        print("Validation Results - Epoch: {}  Avg L2: {:.2f}".format(
            trainer.state.epoch, metrics['L2']))

    trainer.run(train_loader, max_epochs=100)
    return model

コード例 #3

0

ファイルを表示

def plot_allocs_evolution(layoutIndex):
    global amount_per_batch
    amount_per_batch = 1
    general_para.amount_per_batch = amount_per_batch
    steps_amount = 24
    raw_data = utils.load_raw_data(general_para, model_para, ['test'])
    raw_data = raw_data['test']
    for key in raw_data.keys():
        raw_data[key] = np.expand_dims(raw_data[key][layoutIndex],
                                       axis=0)  # maintain 1 X N shape
    show_weights = False
    try:
        weights_binary = np.load(general_para.data_folder +
                                 general_para.valid_data_info["folder"] +
                                 general_para.weights_file)
        weights_binary = np.expand_dims(weights_binary[layoutIndex],
                                        axis=0)  # 1XN
        show_weights = True
    except FileNotFoundError:
        print(
            "No importance weights defining subset: only evaluating sum rate!")
        weights_binary = np.ones([1, N])
    # form one batch (with 1 X 1 X N shape)
    sample_batch = utils.prepare_batches(general_para, raw_data)
    sample_batch = utils.add_appended_indices(general_para, sample_batch)

    g_test, outputs_final, all_timesteps_allocs, placeholders = test_network(
        steps_amount)

    model_loc = general_para.base_dir + model_para.model_loc
    with g_test.as_default():
        saver = tf.train.Saver()
        with tf.Session() as sess:
            saver.restore(sess, model_loc)
            evo_allocs = sess.run(
                all_timesteps_allocs,
                feed_dict={
                    placeholders['tx_indices_hash']:
                    sample_batch['tx_indices_hash'][0],
                    placeholders['rx_indices_hash']:
                    sample_batch['rx_indices_hash'][0],
                    placeholders['tx_indices_extract']:
                    sample_batch['tx_indices_ext'][0],
                    placeholders['rx_indices_extract']:
                    sample_batch['rx_indices_ext'][0],
                    placeholders['pair_tx_convfilter_indices']:
                    sample_batch['pair_tx_convfilter_indices'][0],
                    placeholders['pair_rx_convfilter_indices']:
                    sample_batch['pair_rx_convfilter_indices'][0],
                    placeholders['subset_links']:
                    weights_binary
                })
    evo_allocs = np.array(evo_allocs)
    assert np.shape(evo_allocs) == (steps_amount, 1, N)
    evo_allocs = np.squeeze(evo_allocs)  # feedback steps amount X N
    fig = plt.figure()
    plt.title(
        "Allocation Evolution Over Network Iterations on Layout # {}".format(
            layoutIndex))
    ax = fig.gca()
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    for i in range(steps_amount):
        ax = fig.add_subplot(4, 6, i + 1)
        v_locs = raw_data['locations'][0]
        v_allocs = evo_allocs[i]
        tx_locs = v_locs[:, 0:2]
        rx_locs = v_locs[:, 2:4]
        plt.scatter(tx_locs[:, 0], tx_locs[:, 1], c='r', label='Tx', s=5)
        plt.scatter(rx_locs[:, 0], rx_locs[:, 1], c='b', label='Rx', s=5)
        for j in range(N):  # plot all activated links
            if (show_weights):  # also plot binary weights subset
                if (np.squeeze(weights_binary)[j] == 1):
                    plt.plot([tx_locs[j, 0], rx_locs[j, 0]],
                             [tx_locs[j, 1], rx_locs[j, 1]],
                             'b',
                             linewidth=2.1)
            line_color = 1 - v_allocs[j]
            if line_color == 0:
                line_color = 0.0  # deal with 0 formatting error problem
            plt.plot(
                [tx_locs[j, 0], rx_locs[j, 0]], [tx_locs[j, 1], rx_locs[j, 1]],
                '{}'.format(line_color)
            )  # have to do 1 minus since the smaller the number the darker it gets
        ax.set_xticklabels([])
        ax.set_yticklabels([])
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()

    fig = plt.figure()
    plt.title(
        "Quantized Allocation Evolution Over Network Iterations on Layout # {}"
        .format(layoutIndex))
    plt.axis('off')
    for i in range(steps_amount):
        ax = fig.add_subplot(4, 6, i + 1)
        v_locs = raw_data['locations'][0]
        v_allocs = evo_allocs[i]
        tx_locs = v_locs[:, 0:2]
        rx_locs = v_locs[:, 2:4]
        plt.scatter(tx_locs[:, 0], tx_locs[:, 1], c='r', label='Tx', s=5)
        plt.scatter(rx_locs[:, 0], rx_locs[:, 1], c='b', label='Rx', s=5)
        for j in range(N):  # plot all activated links
            if (show_weights):  # also plot binary weights subset
                if (np.squeeze(weights_binary)[j] == 1):
                    plt.plot([tx_locs[j, 0], rx_locs[j, 0]],
                             [tx_locs[j, 1], rx_locs[j, 1]],
                             'b',
                             linewidth=2.1)
            if v_allocs[j] >= 0.5:
                plt.plot([tx_locs[j, 0], rx_locs[j, 0]],
                         [tx_locs[j, 1], rx_locs[j, 1]], 'r')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()
    return

コード例 #4

0

ファイルを表示

def network_inference_weighted(general_para, gains_diagonal,
                               gains_nondiagonal):
    steps_amount_test = 20
    N_test, layouts_amount, slots_per_layout = general_para.pairs_amount, general_para.test_data_info[
        "layouts"], general_para.test_data_info["slots_per_layout"]
    global N
    N = N_test
    print(
        "[ConvNetSumRateV10 network inference weighted] Starting with N={}; {} Layouts; {} Time slots......"
        .format(N, layouts_amount, slots_per_layout))
    general_para.amount_per_batch = layouts_amount  # for weighted sumrate case, should be small enough amount of layouts
    global amount_per_batch
    amount_per_batch = general_para.amount_per_batch
    # load test data
    raw_data = utils.load_raw_data(general_para, model_para, ['test'])
    test_data = utils.prepare_batches(general_para, raw_data['test'])
    test_data = utils.add_appended_indices(general_para, test_data)
    batches_amount = np.shape(test_data['locations'])[0]
    print("Test batch amount: {}; with {} samples per batch".format(
        batches_amount, general_para.amount_per_batch))

    # create the network graph
    g_test, outputs_final, all_timesteps_allocs, placeholders = test_network(
        steps_amount=steps_amount_test)

    model_loc = general_para.base_dir + model_para.model_loc
    with g_test.as_default():
        saver = tf.train.Saver()
        with tf.Session() as sess:
            print("Restoring previously trained model from: {}".format(
                model_loc))
            saver.restore(sess, model_loc)
            total_time = 0
            allocs = []
            rates = []
            subsets = []
            orig_prop_weights = []
            weights_orig = np.ones([layouts_amount, N])
            weights_binary = np.ones([layouts_amount, N])
            for i in range(1, slots_per_layout + 1):
                if ((i / slots_per_layout * 100) % 20 == 0):
                    print("{}/{} time slots".format(i, slots_per_layout))
                start_time = time.time()
                allocs_oneslot = sess.run(
                    outputs_final,
                    feed_dict={
                        placeholders['tx_indices_hash']:
                        test_data['tx_indices_hash'][0],
                        placeholders['rx_indices_hash']:
                        test_data['rx_indices_hash'][0],
                        placeholders['tx_indices_extract']:
                        test_data['tx_indices_ext'][0],
                        placeholders['rx_indices_extract']:
                        test_data['rx_indices_ext'][0],
                        placeholders['pair_tx_convfilter_indices']:
                        test_data['pair_tx_convfilter_indices'][0],
                        placeholders['pair_rx_convfilter_indices']:
                        test_data['pair_rx_convfilter_indices'][0],
                        placeholders['subset_links']:
                        weights_binary
                    })
                total_time += time.time() - start_time
                allocs_oneslot = allocs_oneslot * weights_binary  # zero out links not to be scheduled
                orig_prop_weights.append(weights_orig)
                subsets.append(weights_binary)
                allocs.append(allocs_oneslot)
                rates_oneslot = utils.compute_rates(general_para,
                                                    allocs_oneslot,
                                                    gains_diagonal,
                                                    gains_nondiagonal)
                rates.append(rates_oneslot)
                weights_orig = utils.proportional_update_weights(
                    general_para, weights_orig, rates_oneslot)
                start_time = time.time()
                weights_binary = utils.binary_importance_weights_approx(
                    general_para, weights_orig)
                total_time += time.time() - start_time
    print("{} layouts with {} links over {} timeslots, it took {} seconds.".
          format(layouts_amount, N, slots_per_layout, total_time))
    allocs = np.transpose(np.array(allocs), (1, 0, 2))
    assert np.shape(allocs) == (layouts_amount, slots_per_layout,
                                N), "Wrong shape: {}".format(np.shape(allocs))
    rates = np.transpose(np.array(rates), (1, 0, 2))
    assert np.shape(rates) == (layouts_amount, slots_per_layout,
                               N), "Wrong shape: {}".format(np.shape(rates))
    subsets = np.transpose(np.array(subsets), (1, 0, 2))
    assert np.shape(subsets) == (layouts_amount, slots_per_layout,
                                 N), "Wrong shape: {}".format(
                                     np.shape(subsets))
    orig_prop_weights = np.transpose(np.array(orig_prop_weights), (1, 0, 2))
    assert np.shape(orig_prop_weights) == (layouts_amount, slots_per_layout,
                                           N), "Wrong shape: {}".format(
                                               np.shape(orig_prop_weights))
    np.save(
        general_para.base_dir + "SanityChecks/Weighted_SumRate_Opt/Conv_V10/" +
        "allocs.npy", allocs)
    np.save(
        general_para.base_dir + "SanityChecks/Weighted_SumRate_Opt/Conv_V10/" +
        "subsets.npy", subsets)
    np.save(
        general_para.base_dir + "SanityChecks/Weighted_SumRate_Opt/Conv_V10/" +
        "prop_weights.npy", orig_prop_weights)
    return allocs, rates, subsets

コード例 #5

0

ファイルを表示

def network_inference(general_para, time_complexity=False):
    steps_amount_test = 20
    N_test, layouts_amount = general_para.pairs_amount, general_para.test_data_info[
        "layouts"]
    global N
    N = N_test
    raw_data = utils.load_raw_data(general_para, model_para, ['test'])
    if (time_complexity):
        layouts_amount = 1
        for field_key in [
                'locations', 'pair_dists', 'tx_indices', 'rx_indices',
                'pair_tx_convfilter_indices', 'pair_rx_convfilter_indices'
        ]:
            raw_data['test'][field_key] = np.expand_dims(
                raw_data['test'][field_key][0], axis=0)
    general_para.amount_per_batch = min(general_para.amount_per_batch,
                                        layouts_amount)
    global amount_per_batch
    amount_per_batch = general_para.amount_per_batch
    test_data = utils.prepare_batches(general_para, raw_data['test'])
    test_data = utils.add_appended_indices(general_para, test_data)
    batches_amount = np.shape(test_data['locations'])[0]

    # create the network graph
    g_test, outputs_final, all_timesteps_allocs, placeholders = test_network(
        steps_amount=steps_amount_test)

    model_loc = general_para.base_dir + model_para.model_loc
    with g_test.as_default():
        saver = tf.train.Saver()
        with tf.Session() as sess:
            print("Restoring model from: {}".format(model_loc))
            saver.restore(sess, model_loc)
            allocs = []
            neural_net_time = 0
            for j in range(batches_amount):
                if (((j + 1) / batches_amount * 100) % 10 == 0):
                    print("{}/{} minibatch... ".format(j + 1, batches_amount))
                start_time = time.time()
                allocs_perBatch = sess.run(
                    outputs_final,
                    feed_dict={
                        placeholders['tx_indices_hash']:
                        test_data['tx_indices_hash'][j],
                        placeholders['rx_indices_hash']:
                        test_data['rx_indices_hash'][j],
                        placeholders['tx_indices_extract']:
                        test_data['tx_indices_ext'][j],
                        placeholders['rx_indices_extract']:
                        test_data['rx_indices_ext'][j],
                        placeholders['pair_tx_convfilter_indices']:
                        test_data['pair_tx_convfilter_indices'][j],
                        placeholders['pair_rx_convfilter_indices']:
                        test_data['pair_rx_convfilter_indices'][j]
                    })
                neural_net_time += time.time() - start_time
                allocs.append(allocs_perBatch)
    neural_net_time_per_layout = neural_net_time / layouts_amount
    print(
        "{} layouts with {} links, it took {} seconds; {} seconds per layout.".
        format(batches_amount * general_para.amount_per_batch, N,
               neural_net_time, neural_net_time_per_layout))
    allocs = np.array(allocs)
    assert np.shape(allocs) == (batches_amount, general_para.amount_per_batch,
                                N), "Wrong shape: {}".format(np.shape(allocs))
    allocs = np.reshape(allocs, [-1, N])

    return allocs, neural_net_time_per_layout

コード例 #6

0

ファイルを表示

    if (args.plot):
        print("Plotting Weights...")
        plot_weights()
        print("Plotting Finished Successfully!")
        exit(0)
    if (args.evoIndex):
        layoutIndex = int(args.evoIndex)
        print(
            "Plotting allocation evolution on layout indexed {} in validation set..."
            .format(layoutIndex))
        plot_allocs_evolution(layoutIndex)
        print("Plotting Finished Successfully!")
        exit(0)

    print("Loading raw data...")
    raw_data = utils.load_raw_data(general_para, model_para,
                                   ['train', 'valid'])
    train_batches_amount = round(
        np.shape(raw_data['train']['locations'])[0] /
        general_para.amount_per_batch)
    valid_batches_amount = round(
        np.shape(raw_data['valid']['locations'])[0] /
        general_para.amount_per_batch)
    FP_valid_active_ratio = np.mean(raw_data['valid']['FP_allocations'])
    FP_valid_std = np.std(raw_data['valid']['FP_allocations'])
    print("Training {} batches; Validation {} batches.".format(
        train_batches_amount, valid_batches_amount))

    g_train, CE, sumrate, train_step, outputs_final, placeholders = train_network(
    )
    model_loc = general_para.base_dir + model_para.model_loc
    with g_train.as_default():

コード例 #7

0

ファイルを表示

    TRAIN_DIR = os.path.join(CW_DIR, 'stage1_train')
    TEST_DIR = os.path.join(CW_DIR, 'stage1_final_test')
    IMG_TYPE = '.png'  # Image type
    IMG_DIR_NAME = 'images'  # Folder name including the image
    MASK_DIR_NAME = 'masks'  # Folder name including the masks
    LOGS_DIR_NAME = 'logs'  # Folder name for TensorBoard summaries
    # Display working/train/test directories.
    print('CW_DIR = {}'.format(CW_DIR))
    print('TRAIN_DIR = {}'.format(TRAIN_DIR))
    print('TEST_DIR = {}'.format(TEST_DIR))

    # Basic properties of images/masks.
    train_df = utils.read_train_data_properties(TRAIN_DIR, IMG_DIR_NAME,
                                                MASK_DIR_NAME)
    test_df = utils.read_test_data_properties(TEST_DIR, IMG_DIR_NAME)
    print('train_df:')
    print(train_df.describe())
    print('')
    print('test_df:')
    print(test_df.describe())

    # Counting unique image shapes.
    df = pd.DataFrame(
        [[x] for x in zip(train_df['img_height'], train_df['img_width'])])
    df[0].value_counts()

    # Read images/masks from files and resize them. Each image and mask
    # is stored as a 3-dim array where the number of channels is 3 and 1,
    # respectively.
    x_train, y_train, y_weights = utils.load_raw_data(train_df)

コード例 #8

0

ファイルを表示

ファイル: main.py プロジェクト: xxFly/SynSetMine-pytorch

    fi = "./data/{}/combined.{}".format(options["dataset"],
                                        options["pretrained_embedding"])
    embedding, index2word, word2index, vocab_size, embed_dim = load_embedding(
        fi)
    logger.info(
        "Finish loading embedding: embed_dim = {}, vocab_size = {}".format(
            embed_dim, vocab_size))
    options["embedding"] = embedding
    options["index2word"] = index2word
    options["word2index"] = word2index
    options["vocabSize"] = vocab_size

    # Load train_set based on different data formats
    fi = "./data/{}/train-cold.{}".format(options["dataset"],
                                          options["data_format"])
    raw_data_string = load_raw_data(fi)
    random.shuffle(raw_data_string)
    train_set_full = element_set.ElementSet("train_set_full",
                                            options["data_format"], options,
                                            raw_data_string)
    print(train_set_full)

    # Load test_set, always in set format
    fi = "./data/{}/test.set".format(args.dataset)
    raw_data_string = load_raw_data(fi)
    random.shuffle(raw_data_string)
    test_set = element_set.ElementSet("test_set", "set", options,
                                      raw_data_string)
    print(test_set)

    # Model training

コード例 #9

0

ファイルを表示

from utils import load_raw_data, load_image_as_vec
from GlazeRecipes import GlazeRecipes
from GlazeMaterialDictionary import MaterialDictionary
from GlazeNet1 import Net
from flask import Flask, request, jsonify
import torch
import random
import string

app = Flask(__name__)

raw_data = load_raw_data()
material_dict = MaterialDictionary(raw_data)
recipes = GlazeRecipes(raw_data, material_dict)
model = Net(len(material_dict))
# model.load_state_dict(torch.load('model_state.pth'))
model.eval()


@app.route('/next', methods=['GET'])
def get_next_glaze():
    pass


@app.route('/image', methods=['GET'])
def get_image():
    pass


@app.route('/predict', methods=['POST'])
def predict():