def main(unused_argv):
    # Get the data.
    x, y = ex.get_data()

    # Split into training and validation.
    x_train, x_valid, y_train, y_valid = train_test_split(
        x, y, test_size=FLAGS.validation_fraction)

    data_sets = {
        'train': zip(x_train, y_train),
        'valid': zip(x_valid, y_valid)
    }

    shards = {'train': FLAGS.train_shards, 'valid': FLAGS.valid_shards}

    data_sets_serialised = {
        name: [ex.get_serialized_example(x, y) for x, y in dataset]
        for name, dataset in data_sets.items()
    }

    # Write out to TFRecords.
    for name, dataset in data_sets_serialised.items():
        ex.write_dataset(output_dir=FLAGS.output_dir,
                         name=name,
                         dataset=dataset,
                         num_shards=shards[name])

    tf.logging.info("Finished writing datasets.")
Beispiel #2
0
def test_get_close_data_for_range():
    """Apple close data for the week of April 1 to April 8."""
    close_price_descriptor = {
        'type': 'close',
        'symbol': 'AAPL',
    }

    close_price_df = get_data({'apple_close': close_price_descriptor}, FIRST_WEEK_IN_APRIL)
Beispiel #3
0
def test_get_boston_high_temperature():
    """High temperature for Boston was 69 degrees Fahrenheit on April 1, 2016."""
    data_descriptors = {'boston_temperature': {'type': 'high_temperature', 'city': 'Boston'}}
    temperature_df = get_data(data_descriptors, APRIL_1_2016_DATERANGE)

    assert temperature_df.columns == ['boston_temperature']

    assert temperature_df['boston_temperature'].iloc[0] == 69
Beispiel #4
0
def test_get_multiple_data_sources():
    """Query historical temperatures for different cities."""
    metrics = {
        'sf_temperature': {'type': 'high_temperature', 'city': 'San Francisco'},
        'boston_temperature': {'type': 'high_temperature', 'city': 'Boston'}
    }

    df = get_data(metrics, APRIL_1_2016_DATERANGE)
Beispiel #5
0
def test_get_sf_high_temperature():
    """High temperature for SF was 59 degrees Fahrenheit on April 1, 2016."""
    data_descriptors = {'sf_temperature': {'type': 'high_temperature', 'city': 'San Francisco'}}
    temperature_df = get_data(data_descriptors, APRIL_1_2016_DATERANGE)

    assert temperature_df.columns == ['sf_temperature']

    assert temperature_df['sf_temperature'].iloc[0] == 59
def test_get_close_data_for_range():
    """Apple close data for the week of April 1 to April 8."""
    close_price_descriptor = {
        'type': 'close',
        'symbol': 'AAPL',
    }

    close_price_df = get_data({'apple_close': close_price_descriptor},
                              FIRST_WEEK_IN_APRIL)
Beispiel #7
0
def test_get_amazon_high_price():
    """High price for amazon on 2016-04-01 was ???"""
    close_price_parameters = {
        'type': 'close',
        'symbol': 'AMZN',
    }

    close_price_df = get_data({'apple_close': close_price_parameters}, APRIL_1_2016_DATERANGE)
    value = close_price_df['close'].iloc[0]
def test_get_amazon_high_price():
    """High price for amazon on 2016-04-01 was ???"""
    close_price_parameters = {
        'type': 'close',
        'symbol': 'AMZN',
    }

    close_price_df = get_data({'apple_close': close_price_parameters},
                              APRIL_1_2016_DATERANGE)
    value = close_price_df['close'].iloc[0]
def test_moon_phase():
    """March 24, 2016 was a full moon."""
    data_descriptors = {'moon_phase': {'type': 'moon_phase'}}
    date_range = pd.date_range(start=datetime.date(2016, 3, 24), periods=1)

    moon_phase_df = get_data(data_descriptors, date_range)

    assert moon_phase_df.columns == ['moon_phase']
    assert moon_phase_df.index == date_range

    assert moon_phase_df['moon_phase'].iloc[0] == 14  # full moon
Beispiel #10
0
def test_moon_phase():
    """March 24, 2016 was a full moon."""
    data_descriptors = {'moon_phase': {'type': 'moon_phase'}}
    date_range = pd.date_range(start=datetime.date(2016, 3, 24), periods=1)

    moon_phase_df = get_data(data_descriptors, date_range)

    assert moon_phase_df.columns == ['moon_phase']
    assert moon_phase_df.index == date_range

    assert moon_phase_df['moon_phase'].iloc[0] == 14  # full moon
Beispiel #11
0
def test_get_apple_close_price_data():
    """Apple close price for April 1, 2016 was $109.99."""
    close_price_parameters = {
        'type': 'close',
        'symbol': 'AAPL',
    }

    close_price_df = get_data({'apple_close': close_price_parameters}, APRIL_1_2016_DATERANGE)

    value = close_price_df['close'].iloc[0]

    assert value == 109.99
def test_get_apple_close_price_data():
    """Apple close price for April 1, 2016 was $109.99."""
    close_price_parameters = {
        'type': 'close',
        'symbol': 'AAPL',
    }

    close_price_df = get_data({'apple_close': close_price_parameters},
                              APRIL_1_2016_DATERANGE)

    value = close_price_df['close'].iloc[0]

    assert value == 109.99
def test_get_boston_high_temperature():
    """High temperature for Boston was 69 degrees Fahrenheit on April 1, 2016."""
    data_descriptors = {
        'boston_temperature': {
            'type': 'high_temperature',
            'city': 'Boston'
        }
    }
    temperature_df = get_data(data_descriptors, APRIL_1_2016_DATERANGE)

    assert temperature_df.columns == ['boston_temperature']

    assert temperature_df['boston_temperature'].iloc[0] == 69
def test_get_sf_high_temperature():
    """High temperature for SF was 59 degrees Fahrenheit on April 1, 2016."""
    data_descriptors = {
        'sf_temperature': {
            'type': 'high_temperature',
            'city': 'San Francisco'
        }
    }
    temperature_df = get_data(data_descriptors, APRIL_1_2016_DATERANGE)

    assert temperature_df.columns == ['sf_temperature']

    assert temperature_df['sf_temperature'].iloc[0] == 59
def test_get_multiple_data_sources():
    """Query historical temperatures for different cities."""
    metrics = {
        'sf_temperature': {
            'type': 'high_temperature',
            'city': 'San Francisco'
        },
        'boston_temperature': {
            'type': 'high_temperature',
            'city': 'Boston'
        }
    }

    df = get_data(metrics, APRIL_1_2016_DATERANGE)
Beispiel #16
0
def data():
    X, y = get_data(os.path.join(os.getcwd(), './data'), junk=True)
    return X, y