Python read_dataの例、utils.read_data.read_data Pythonの例

コード例 #1

0

ファイルを表示

def inference():
    configs = read_config(FLAGS.config_path)
    class_ids = read_config(FLAGS.class_config)

    img_data = read_data(FLAGS.data_dir, int(configs['width']),
                         int(configs['height']))

    cyc_GAN = CycleGAN(configs)
    cyc_GAN.build_model()

    # You must define saver after you build a tensorflow graph
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, FLAGS.model_path)

        meta_data = meta_initializer(
            int(class_ids[FLAGS.t]),
            [int(configs['width']),
             int(configs['height'])])

        idx = 0

        for img in img_data:

            img_vec = np.expand_dims(img, axis=0)

            if int(class_ids[FLAGS.f]) < int(class_ids[FLAGS.t]):
                mode = "AtoB"
            else:
                mode = "BtoA"

            output_img = sess.run(cyc_GAN.predict(mode),
                                  feed_dict={
                                      cyc_GAN.sample_vector: img_vec,
                                      cyc_GAN.sample_meta_data: meta_data
                                  })

            output_img = sess.run(tf.image.encode_jpeg(tf.squeeze(output_img)))

            with open(join(FLAGS.output_dir, str(idx) + '.jpg'), 'wb') as f:
                f.write(output_img)

            idx += 1

コード例 #2

0

ファイルを表示

def main(args):

    # Read data from local disk
    print("[info] Reading data from disk ...")
    X, y = read_data(path=args['data_path'])

    # Normalize data
    # X = min_max_normalization(X=X)

    # Build model
    print("[info] Building model ...")
    model = LinearRegression(args=args)

    # Fitting model with data
    print("[info] learning ...")
    model.learn(X=X, y=y, output_log='logs/linear_regression.csv')

    # Plot the training progress
    plot_progress(path='log', save=True)

    # Evaluate on test set
    model.evaluate()

コード例 #3

0

ファイルを表示

ファイル: cee.py プロジェクト: NoFishLikeIan/tinbergen

from utils.read_data import read_data
from utils.plot import plot_var
from utils.var import var, select_lr

if __name__ == "__main__":
    df = read_data("data/cee.xls", f="1963Q3")

    lag = select_lr(df, maxlags=15)
    print("LR ratio: ", lag)

    standard_res = var(df, lags=4)
    plot_var(standard_res,
             impulse="FF",
             response=df.columns,
             folder="cee/ex",
             autocorr=True)

    ic = "bic"
    standard_res = var(df, trend="c", lags=2)
    plot_var(standard_res, impulse="FF", folder=f"cee/{ic}")

    new_order = ["Y", "P", "PCOM", "FF", "TR", "NBR", "M1"]
    reindexed = df.reindex(columns=new_order)
    standard_res = var(reindexed, lags=4)
    plot_var(standard_res,
             impulse="FF",
             response=df.columns,
             folder="cee/swapped",
             autocorr=True)

コード例 #4

0

ファイルを表示

def train(config):
    """Train a en->fr translation model using WMT data."""
    # Prepare WMT data.
    print("Preparing WMT data in %s" % config.data_dir)
    en_embd_name = ""
    en_train, fr_train, en_dev, fr_dev, _, _, embd_mat_en, embd_mat_fr = data_utils.prepare_wmt_data(
        config.data_dir, config.en_vocab_size, config.fr_vocab_size)
    #config.embedding_en_path, config.embedding_fr_path, 'enc_embedding', 'dec_embedding')

    with tf.Session() as sess:
        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)

        # Create model.
        print("Creating %d layers of %d units." %
              (config.num_layers, config.size))
        model = create_model(sess, config, False)

        #if not config.probabilistic:
        # self.kl_rate_update(0.0)

        train_writer = tf.summary.FileWriter(os.path.join(
            FLAGS.model_dir, "train"),
                                             graph=sess.graph)
        dev_writer = tf.summary.FileWriter(os.path.join(
            FLAGS.model_dir, "test"),
                                           graph=sess.graph)

        # Read data into buckets and compute their sizes.
        print("Reading development and training data (limit: %d)." %
              config.max_train_data_size)

        dev_set = read_data.read_data(en_dev, fr_dev, config)
        train_set = read_data.read_data(en_train, fr_train, config,
                                        config.max_train_data_size)
        train_bucket_sizes = [
            len(train_set[b]) for b in xrange(len(config.buckets))
        ]
        train_total_size = float(sum(train_bucket_sizes))

        # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
        # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
        # the size if i-th training bucket, as used later.
        train_buckets_scale = [
            sum(train_bucket_sizes[:i + 1]) / train_total_size
            for i in xrange(len(train_bucket_sizes))
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        KL_loss = 0.0
        current_step = model.global_step.eval()
        step_loss_summaries = []
        step_KL_loss_summaries = []
        overall_start_time = time.time()
        while True:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([
                i for i in xrange(len(train_buckets_scale))
                if train_buckets_scale[i] > random_number_01
            ])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                train_set, bucket_id)
            _, step_loss, step_KL_loss, _ = model.step(sess, encoder_inputs,
                                                       decoder_inputs,
                                                       target_weights,
                                                       bucket_id, False,
                                                       config.probabilistic)

            if config.anneal and model.global_step.eval(
            ) > config.kl_rate_rise_time and model.kl_rate < 1:
                new_kl_rate = model.kl_rate.eval() + config.kl_rate_rise_factor
                sess.run(model.kl_rate_update,
                         feed_dict={'new_kl_rate': new_kl_rate})

            step_time += (time.time() -
                          start_time) / config.steps_per_checkpoint
            step_loss_summaries.append(
                tf.Summary(value=[
                    tf.Summary.Value(tag="step loss",
                                     simple_value=float(step_loss))
                ]))
            step_KL_loss_summaries.append(
                tf.Summary(value=[
                    tf.Summary.Value(tag="KL step loss",
                                     simple_value=float(step_KL_loss))
                ]))
            loss += step_loss / config.steps_per_checkpoint
            KL_loss += step_KL_loss / config.steps_per_checkpoint
            current_step = model.global_step.eval()

            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % config.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")
                print(
                    "global step %d learning rate %.4f step-time %.2f perplexity "
                    "%.2f" %
                    (model.global_step.eval(), model.learning_rate.eval(),
                     step_time, perplexity))

                print(
                    "global step %d learning rate %.4f step-time %.2f KL divergence "
                    "%.2f" % (model.global_step.eval(),
                              model.learning_rate.eval(), step_time, KL_loss))
                wall_time = time.time() - overall_start_time
                print("time passed: {0}".format(wall_time))

                # Add perplexity, KL divergence to summary and stats.
                perp_summary = tf.Summary(value=[
                    tf.Summary.Value(tag="train perplexity",
                                     simple_value=perplexity)
                ])
                train_writer.add_summary(perp_summary, current_step)
                KL_loss_summary = tf.Summary(value=[
                    tf.Summary.Value(tag="KL divergence", simple_value=KL_loss)
                ])
                train_writer.add_summary(KL_loss_summary, current_step)
                for i, summary in enumerate(step_loss_summaries):
                    train_writer.add_summary(summary, current_step - 200 + i)
                step_loss_summaries = []
                for i, summary in enumerate(step_KL_loss_summaries):
                    train_writer.add_summary(summary, current_step - 200 + i)
                step_KL_loss_summaries = []

                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.model_dir,
                                               FLAGS.model_name + ".ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
                step_time, loss, KL_loss = 0.0, 0.0, 0.0

                # Run evals on development set and print their perplexity.
                eval_losses = []
                eval_KL_losses = []
                eval_bucket_num = 0
                for bucket_id in xrange(len(config.buckets)):
                    if len(dev_set[bucket_id]) == 0:
                        print("  eval: empty bucket %d" % (bucket_id))
                        continue
                    eval_bucket_num += 1
                    encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                        dev_set, bucket_id)
                    _, eval_loss, eval_KL_loss, _ = model.step(
                        sess, encoder_inputs, decoder_inputs, target_weights,
                        bucket_id, True, config.probabilistic)
                    eval_losses.append(float(eval_loss))
                    eval_KL_losses.append(float(eval_KL_loss))
                    eval_ppx = math.exp(
                        float(eval_loss)) if eval_loss < 300 else float("inf")
                    print("  eval: bucket %d perplexity %.2f" %
                          (bucket_id, eval_ppx))

                    eval_perp_summary = tf.Summary(value=[
                        tf.Summary.Value(tag="eval perplexity for bucket {0}".
                                         format(bucket_id),
                                         simple_value=eval_ppx)
                    ])
                    dev_writer.add_summary(eval_perp_summary, current_step)

                mean_eval_loss = sum(eval_losses) / float(eval_bucket_num)
                mean_eval_KL_loss = sum(eval_KL_losses) / float(
                    eval_bucket_num)
                mean_eval_ppx = math.exp(float(mean_eval_loss))
                print("  eval: mean perplexity {0}".format(mean_eval_ppx))

                eval_loss_summary = tf.Summary(value=[
                    tf.Summary.Value(tag="mean eval loss",
                                     simple_value=float(mean_eval_ppx))
                ])
                dev_writer.add_summary(eval_loss_summary, current_step)
                eval_KL_loss_summary = tf.Summary(value=[
                    tf.Summary.Value(tag="mean eval loss",
                                     simple_value=float(mean_eval_KL_loss))
                ])
                dev_writer.add_summary(eval_KL_loss_summary, current_step)

コード例 #5

0

ファイルを表示

 len_argv = len(sys.argv)
 K = 10
 L = 5
 try:
     flag = sys.argv[1]
     K = int(sys.argv[2])
     L = int(sys.argv[3])
 except:
     pass
 print '*' * 30
 # make generalization hierarchies
 gen_gh_trees()
 #read gentree tax
 att_trees = read_tree()
 #read record
 data = read_data()
 # APA need only GH for transaction
 if flag == 'k':
     get_result_K(att_trees, data)
 elif flag == 'l':
     get_result_L(att_trees, data)
 elif flag == 'data':
     get_result_dataset(att_trees, data)
 elif flag == 'apa':
     if len_argv > 2:
         get_result_one(att_trees, data, K, L)
     else:
         get_result_one(att_trees, data)
 elif flag == '':
     get_result_one(att_trees, data)
 else:

コード例 #6

0

ファイルを表示

ファイル: anonymizer.py プロジェクト: zshwuhan/1M_Generalization


if __name__ == "__main__":
    FLAG = ""
    try:
        FLAG = sys.argv[1]
    except:
        pass
    # read record
    print "*" * 30
    # make generalization hierarchies
    gen_gh_trees()
    # read gentree tax
    ATT_TREES = read_tree()
    # read record
    DATA = read_data()
    # Separation_Gen need only GH for transaction
    if FLAG == "k":
        get_result_k(ATT_TREES, DATA)
    elif FLAG == "l":
        get_result_l(ATT_TREES, DATA)
    elif FLAG == "data":
        get_result_dataset(ATT_TREES, DATA)
    elif FLAG == "":
        # cProfile.run('get_result_one(ATT_TREE, DATA, TYPE_ALG)')
        get_result_one(ATT_TREES, DATA)
    else:
        try:
            INPUT_K = int(FLAG)
            get_result_one(ATT_TREEs, DATA)
        except ValueError:

コード例 #7

0

ファイルを表示

ファイル: anonymizer.py プロジェクト: ytl142857/Mondrian

    INPUT_K = 10
    # read record

    RELAX = True if mode_config.model == "relax" else False

    if RELAX:
        print("Relax Mondrian")
    else:
        print("Strict Mondrian")

    # print("Adult data")
    # INTUITIVE_ORDER is an intuitive order for
    # categorical attributes. This order is produced
    # by the reading (from data set) order.
    DATA, INTUITIVE_ORDER = read_data()
    print(INTUITIVE_ORDER)

    FLAG = mode_config.flag
    if FLAG == 'k':
        get_result_k(DATA)
    elif FLAG == 'qi':
        get_result_qi(DATA)
    elif FLAG == 'data':
        get_result_dataset(DATA)
    elif FLAG == '':
        get_result_one(DATA, mode_config.k)
    else:
        try:
            get_result_one(DATA, 10)
        except ValueError:

コード例 #8

0

ファイルを表示

ファイル: anonymizer.py プロジェクト: sabahsuhail/Anatomize

"""
run anatomize with given parameters
"""

# !/usr/bin/env python
# coding=utf-8

# by Qiyuan Gong
# [email protected]

from anatomize import anatomize
from utils.read_data import read_data
import sys

if __name__ == '__main__':
    L = 10
    try:
        L = int(sys.argv[1])
    except IndexError:
        pass
    # read record
    RAW_DATA = read_data()
    # remove duplicate items
    print "Begin Anatomizer"
    RESULT = anatomize(RAW_DATA, L)
    print "No. groups in result=%d" % len(RESULT)
    print "Finish Anatomizer!!"

コード例 #9

0

ファイルを表示

    def initialize_game(self, difficulty):
        self.canvas.clear()
        self.state = "game"
        self.difficulty = difficulty

        self.audio_controller = AudioController(self.difficulty)
        self.audio_controller.toggle()

        self.notes_down = [
        ]  # an array that keeps track of all notes that are currently being played on midi keyboard

        # The display for the gems, now bar, and bar lines
        self.canvas.add(Color(1, 1, 1))

        rect = Rectangle(pos=(0, 0),
                         size=(Window.width, Window.height),
                         texture=Image('assets/bg_c.png').texture)

        self.canvas.add(rect)
        self.lane_manager = LaneManager()

        # Display the status of the game through the text labels

        self.canvas.add(Color(1, 1, 1))
        self.hud = Rectangle(pos=(0, Window.height / 1.12),
                             size=(Window.width / 4, Window.height / 9),
                             texture=Image('assets/tophub.png').texture)
        self.canvas.add(self.hud)
        self.canvas.add(Color(1, 1, 1))
        self.hud_score = Rectangle(
            pos=(Window.width / 1.4, Window.height / 1.12),
            size=(Window.width / 3.5, Window.height / 9),
            texture=Image('assets/topscore.png').texture)
        self.canvas.add(self.hud_score)

        self.score_label = score_label()
        self.add_widget(self.score_label)
        self.hp_label = hp_label()
        self.add_widget(self.hp_label)

        self.enemy_times = []
        self.enemy_lanes = []
        self.enemy_types = []
        self.enemy_chords = []

        self.enemy_manager = EnemyManager()
        self.canvas.add(self.enemy_manager)

        if difficulty == "easy":
            read_data("song_annotations/hallelujah_left_hand_test.txt", None,
                      self.enemy_times, self.enemy_lanes, self.enemy_types,
                      self.enemy_chords)
            self.song_length = 140
        elif difficulty == "medium":
            read_data("song_annotations/falling_left_hand_test.txt",
                      "song_annotations/falling_right_hand_test.txt",
                      self.enemy_times, self.enemy_lanes, self.enemy_types,
                      self.enemy_chords)
            self.song_length = 44
        elif difficulty == "hard":
            read_data("song_annotations/hallelujah_left_hand_test.txt",
                      "song_annotations/hallelujah_right_hand_test.txt",
                      self.enemy_times,
                      self.enemy_lanes,
                      self.enemy_types,
                      self.enemy_chords,
                      inversions=True)
            self.song_length = 140

        self.prev_time = time.time()
        self.elapsed_time = 0
        self.note_index = 0

        window_size = 4  # 4 seconds of notes are displayed
        x_scale = Window.width / window_size  # pixels / sec

        # Create the player object which will store and control the state of the game
        self.player = Player(self.hp_label, self.score_label, self.enemy_times,
                             self.enemy_lanes, self.enemy_types,
                             self.enemy_chords, self.enemy_manager,
                             self.audio_controller)
        self.canvas.add(self.player)
        self.player.toggle()

コード例 #10

0

ファイルを表示

    """Returns a LaTeX pmatrix

    :a: numpy array
    :returns: LaTeX pmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('pmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{pmatrix}']
    rv += ['  ' + ' & '.join(l.split()) + r'\\' for l in lines]
    rv += [r'\end{pmatrix}']
    return '\n'.join(rv)


if __name__ == "__main__":
    df = read_data("data/bp.xls")
    scaled_df, unscaled_df = parse_data(df)

    pre_elas, post_elas = G_Y_multiplier(scaled_df, unscaled_df)

    print(f"Multiplier: {pre_elas} (pre) and {post_elas} (post)")

    dummy_df = scaled_df.copy()
    dummy_df["D"] = 0.
    dummy_df["D"]["1975Q2"] = 1.

    standard_res = var(dummy_df, lags=4)
    plot_var(standard_res,
             impulse="G",
             response=dummy_df.columns,
             folder="bp/base",

コード例 #11

0

ファイルを表示

ファイル: anonymizer.py プロジェクト: qiyuangong/Partition_for_Transaction


if __name__ == '__main__':
    # set K=10 as default
    FLAG = ''
    DATA_SELECT = ''
    # gen_even_BMS_tree(5)
    try:
        DATA_SELECT = sys.argv[1]
        FLAG = sys.argv[2]
    except IndexError:
        pass
    INPUT_K = 10
    if DATA_SELECT == 'i':
        print "INFORMS data"
        DATA = read_data(1)
        ATT_TREE = read_tree(2)
    else:
        print "BMS-WebView data"
        DATA = read_data(0)
        ATT_TREE = read_tree(0)
    # read generalization hierarchy
    # read record
    # remove duplicate items
    for i in range(len(DATA)):
        DATA[i] = list(set(DATA[i]))
    print "Begin Partition"
    if FLAG == 'k':
        get_result_k(ATT_TREE, DATA)
    elif FLAG == 'data':
        get_result_dataset(ATT_TREE, DATA)

コード例 #12

0

ファイルを表示

ファイル: ramey.py プロジェクト: NoFishLikeIan/tinbergen


def parse_data(df: pd.DataFrame) -> pd.DataFrame:

    new_df = df.copy()
    pop = new_df["POP"]

    for transf_var in transform:
        trans_data = np.log(new_df[transf_var] / pop)
        new_df[name_map[transf_var]] = trans_data

    return new_df.drop(transform, axis=1)


if __name__ == "__main__":
    df = read_data("data/ramey.xls")
    parsed_df = parse_data(df)["1960Q1":]

    restr_var = ["G", "T", "Y"]
    restr_df = parsed_df[restr_var]
    res = var(restr_df, trend="ctt", lags=4)
    plot_var(res, impulse="G", folder="ramey/base")

    aug_var = restr_var + ["CONS", "INV"]
    aug_df = parsed_df[aug_var]
    res = var(aug_df, trend="ctt", lags=4)
    plot_var(res, impulse="G", folder="ramey/aug")

    war_var = ["WAR"] + aug_var
    war_df = parsed_df[war_var]
    res = var(war_df, trend="ctt", lags=4)

コード例 #13

0

ファイルを表示

ファイル: anonymizer.py プロジェクト: zshwuhan/PAA

 len_argv = len(sys.argv)
 K = 10
 L = 5
 try:
     flag = sys.argv[1]
     K = int(sys.argv[2])
     L = int(sys.argv[3])
 except:
     pass
 print '*'*30
 # make generalization hierarchies
 gen_gh_trees()
 #read gentree tax
 att_trees = read_tree()
 #read record
 data = read_data()
 # PAA need only GH for transaction
 if flag == 'k':
     get_result_K(att_trees, data)
 elif flag == 'l':
     get_result_L(att_trees, data)
 elif flag == 'data':
     get_result_dataset(att_trees, data)
 elif flag == 'paa':
     if len_argv > 2:
         get_result_one(att_trees, data, K, L)
     else:
         get_result_one(att_trees, data)
 elif flag =='':
     get_result_one(att_trees, data)
 else:

コード例 #14

0

ファイルを表示

 FLAG = ''
 # gen_even_BMS_tree(5)
 try:
     TYPE_ALG = sys.argv[1]
     DATA_SELECT = sys.argv[2]
     FLAG = sys.argv[3]
 except IndexError:
     pass
 INPUT_K = 10
 if TYPE_ALG == 'DA' or TYPE_ALG == 'da':
     print "Begin DA"
 else:
     print "Begin AA"
 if DATA_SELECT == 'i':
     print "INFORMS data"
     DATA = read_data(1)
     ATT_TREE = read_tree(2)
 else:
     print "BMS-WebView data"
     DATA = read_data(0)
     ATT_TREE = read_tree(0)
 print "*" * 30
 # read generalization hierarchy
 # read record
 # remove duplicate items
 # DATA = DATA[:1000]
 # for i in range(len(DATA)):
 #     if len(DATA[i]) <= 40:
 #         DATA[i] = list(set(DATA[i]))
 #     else:
 #         DATA[i] = list(set(DATA[i][:40]))

コード例 #15

0

ファイルを表示

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from utils.read_data import read_data
from utils.plot import plot_series


if __name__ == '__main__':
    
    df = read_data("data/rz.xls", time_name="quarter")
    
    plot_series(df, folder="rz")

    df.to_csv("data/parsed_rz.xls")