Exemple #1
0
    def test_read_csv(self):
        """
        Test procedure for the function read_csv()
        """
        # Access the file relative to this one, not the user's terminal
        parent = os.path.split(__name__)[0]

        # First test
        fpath = os.path.join(parent, 'files', 'readcsv1.csv')
        table = read_csv(fpath)

        self.assertEqual(type(table), list)
        self.assertTrue(len(table) > 0 and type(table[0]) == list)
        self.assertTrue(len(table[0]) > 0 and type(table[0][0]) == str)
        self.assertEqual(table, FILE1)

        # Second test
        fpath = os.path.join(parent, 'files', 'readcsv2.csv')
        table = read_csv(fpath)

        self.assertEqual(type(table), list)
        self.assertTrue(len(table) > 0 and type(table[0]) == list)
        self.assertTrue(len(table[0]) > 0 and type(table[0][0]) == str)
        self.assertEqual(table, FILE2)
        print("read_csv complete")
def test_002a():
    csvfilepath = 'weather-data/indoor-temperature-1617.csv'

    csv = read_csv(csvfilepath)

    assert len(csv) == 354

    df = pd.DataFrame(csv)
    print(df.head())
    df["Humidity"] = pd.to_numeric(df["Humidity"], errors='coerce')
    df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce')
    df["Temperature_range (low)"] = pd.to_numeric(
        df["Temperature_range (low)"], errors='coerce')
    df["Temperature_range (high)"] = pd.to_numeric(
        df["Temperature_range (high)"], errors='coerce')

    # Compare with Pandas CSV reader:
    df2 = pd.read_csv(csvfilepath)
    df2["Humidity"] = pd.to_numeric(df2["Humidity"], errors='coerce')
    df2["Temperature"] = pd.to_numeric(df2["Temperature"], errors='coerce')
    df2["Temperature_range (low)"] = pd.to_numeric(
        df2["Temperature_range (low)"], errors='coerce')
    df2["Temperature_range (high)"] = pd.to_numeric(
        df2["Temperature_range (high)"], errors='coerce')
    assert all(df["Humidity"] == df2["Humidity"])
    assert all(df["Humidity"].describe() == df2["Humidity"].describe())
    assert all(df["Temperature"] == df2["Temperature"])
    assert all(df["Temperature"].describe() == df2["Temperature"].describe())
    assert all(df["Temperature_range (low)"] == df2["Temperature_range (low)"])
    assert all(df["Temperature_range (low)"].describe() ==
               df2["Temperature_range (low)"].describe())
def test_004():
    csv = read_csv('weather-data/rainfall-1617.csv')

    assert len(csv) == 353

    df = pd.DataFrame(csv)
    print(df.head())
    df["mm"] = pd.to_numeric(df["mm"], errors='coerce')
    print(df["mm"].describe())
def run_MLP(params):

    config = config_reader.read_config(utils.abs_path_of("config/default.ini"))

    if not os.path.isdir(config.get_rel_path("PATHS", "checkpoint_dir")):
        utils.mkdir_recursive(config.get_rel_path("PATHS", "checkpoint_dir"))

    iris_runner = mlp.FCNRunner(config,
                                params)  # trows, vrows, test_rows, config)
    if "TRAINING" in config:
        with tf.name_scope("train_data"):
            #train_batch_size = config.getint("TRAINING", "batch_size")
            train_batch_size = params['batch_size']
            stratified_task = config.get("TRAINING",
                                         "stratified_sampling",
                                         fallback="")
            trows = csv_reader.read_csv(
                config.get_rel_path("PATHS", "training_file"),
                train_batch_size, stratified_task, config)

        with tf.name_scope("validation_data"):
            vrows = csv_reader.read_csv(
                config.get_rel_path("PATHS", "validation_file"),
                config.getint("TRAINING", "validation_batch_size"))

        iris_runner.bind_training_dataqueue(trows, params)
        iris_runner.bind_validation_dataqueue(vrows)
    '''
    if "TEST" in config:
        test_path = config.get_rel_path("TEST","test_file")
        with tf.name_scope("test_data"):
            test_rows = csv_reader.read_test_csv(test_path, int(config["TEST"]["batch_size"]))
        iris_runner.bind_test_dataqueue(test_rows)
    '''
    iris_runner.initialize()

    if "TRAINING" in config:
        valid_loss = iris_runner.run_training()
    #if "TEST" in config:
    #iris_runner.run_test()

    return valid_loss
Exemple #5
0
 def question_body_stream(self):
     global tokenizer
     tokenizer = self.tokenizer
     csv = read_csv("output/stackoverflow-data/questions.csv", set(["Id", "Body"]), subsample=self.subsample, limit=self.limit)
     pool = multiprocessing.Pool(self.processes)
     for Id, tokens in pool.imap(_process_row, csv, chunksize=10000):
         if tokens:
             yield tokens
         else:
             print "ERROR in row %s" % Id
     pool.terminate()
def read_csv_calculate_stats(csv_file):
    # Read CSV document:
    print("\nAttempting to read {}:".format(csv_file))
    csv = read_csv(csv_file, header=True)

    # Convert CSV document to Pandas dataframe:
    df_indoor = pd.DataFrame(csv)

    # Calculate basic statistics on Humidity:
    print("\nCalculate basic statistics for Humidity:".format(csv_file))
    df_indoor["Humidity"] = pd.to_numeric(df_indoor["Humidity"], errors='coerce')
    humidity_stats = df_indoor["Humidity"].describe()
    return humidity_stats
def test_001():
    csvfilepath = 'weather-data/barometer-1617.csv'
    csv = read_csv(csvfilepath)

    assert len(csv) == 355

    # Compare with Pandas CSV reader:
    df = pd.DataFrame(csv)
    df["Baro"] = pd.to_numeric(df["Baro"], errors='coerce')
    df2 = pd.read_csv(csvfilepath)
    df2["Baro"] = pd.to_numeric(df2["Baro"], errors='coerce')
    assert all(df["Baro"] == df2["Baro"])
    assert all(df["Baro"].describe() == df2["Baro"].describe())
def test_003():
    csv = read_csv('weather-data/outside-temperature-1617.csv')

    assert len(csv) == 355

    df = pd.DataFrame(csv)
    print(df.head())
    df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce')
    df["Temperature_range (low)"] = pd.to_numeric(
        df["Temperature_range (low)"], errors='coerce')
    df["Temperature_range (high)"] = pd.to_numeric(
        df["Temperature_range (high)"], errors='coerce')
    print(df["Temperature"].describe())
    print(df["Temperature_range (low)"].describe())
    print(df["Temperature_range (high)"].describe())
def test_000():
    csvfilepath = 'weather-data/testcsverror001_indoor-temperature-1617.csv'
    csv = read_csv(csvfilepath)

    print(csv)

    assert len(csv) == 6

    csvfilepath = 'weather-data/testnoheader002_indoor-temperature-1617.csv'
    csv = read_csv(csvfilepath, header=True)

    assert len(csv) == 1

    # Compare with Pandas CSV reader:
    df = pd.DataFrame(csv)
    df2 = pd.read_csv(csvfilepath, error_bad_lines=False)
    df[df.columns[1]] = pd.to_numeric(df[df.columns[1]], errors='coerce')
    df2[df2.columns[1]] = pd.to_numeric(df2[df2.columns[1]], errors='coerce')
    print(df2)
    print(df)
    assert all(df[df.columns[1]] == df2[df2.columns[1]])

    csv = read_csv('weather-data/testnoheader002_indoor-temperature-1617.csv',
                   header=False)

    assert len(csv) == 2

    csv = read_csv(
        'weather-data/testemptyspaces003_indoor-temperature-1617.csv',
        header=True)

    assert len(csv) == 1

    csv = read_csv(
        'weather-data/testemptyspaces003_indoor-temperature-1617.csv',
        header=False)

    assert len(csv) == 2

    csv = read_csv('weather-data/testbadheader004_indoor-temperature-161.csv',
                   header=True)

    assert len(csv) == 2

    csv = read_csv('weather-data/testbadheader004_indoor-temperature-161.csv',
                   header=False)

    assert len(csv) == 2
Exemple #10
0
    def get_Sapp(self):

        # Get the data from a csv file
        data = cr.read_csv(self.file)

        attributs_name = []

        # Get the differents attributs names
        for name in data[0]:
            attributs_name.append(name)

        attributs_val = []

        # Get the differents value for every attributs
        for i in range(0, len(attributs_name)):
            val = []
            for j in range(1, len(data)):
                if data[j][i] not in val:
                    val.append(data[j][i])
            attributs_val.append(val)

        return attributs_name, attributs_val, data[1:]
Exemple #11
0
def fillDatabase():
    data = read_csv()
    session = Session()
    for c in data:
        id = c.get('id')
        name = c.get('name')
        belongs_to = c.get('belongs_to')
        if belongs_to != '99' and belongs_to != '':
            d = Constituency(id=id, name=name, belongs_to=belongs_to)

            for i in c.get('parties'):
                party = get_party_by_name(i.get('name'))
                party_id = []
                if not party is None:
                    party_id = party.id
                else:
                    party = Party(name=i.get('name'))
                    session.add(party)
                    session.flush()
                    party_id = party.id

                session.add(d)
                session.commit()
                votes = Vote(
                    party_id=party.id,
                    constituency_id=id,
                    first_provisional_votes=i.get('first').get('provisional'),
                    first_previous_votes=i.get('first').get('previous'),
                    second_provisional_votes=i.get('second').get(
                        'provisional'),
                    second_previous_votes=i.get('second').get('previous'))
                session.add(votes)
                session.commit()
        else:
            d = State(id=id, name=name, belongs_to='99')
            session.add(d)
            session.commit()
#
# Copyright 2017 by InfoMus Lab - DIST - University of Genova, http://www.infomus.org
#

# execfile( 'csv_test.py' )
import csv
from sensor_info import *
from csv_reader import read_csv

t = read_csv('sculpture.csv')
print str(t)


print 'Loading infrared sensors info from "file"'
print 'End of script'

Exemple #13
0
import sys, os

config = config_reader.read_config(utils.abs_path_of("config/default.ini"))

if not os.path.isdir(config.get_rel_path("PATHS", "checkpoint_dir")):
    utils.mkdir_recursive(config.get_rel_path("PATHS", "checkpoint_dir"))

iris_runner = mlp.FCNRunner(config)  # trows, vrows, test_rows, config)
if "TRAINING" in config:
    with tf.name_scope("train_data"):
        train_batch_size = config.getint("TRAINING", "batch_size")
        stratified_task = config.get("TRAINING",
                                     "stratified_sampling",
                                     fallback="")
        trows = csv_reader.read_csv(
            config.get_rel_path("PATHS", "training_file"), train_batch_size,
            stratified_task, config)

    with tf.name_scope("validation_data"):
        vrows = csv_reader.read_csv(
            config.get_rel_path("PATHS", "validation_file"),
            config.getint("TRAINING", "validation_batch_size"))

    iris_runner.bind_training_dataqueue(trows)
    iris_runner.bind_validation_dataqueue(vrows)

if "TEST" in config:
    test_path = config.get_rel_path("TEST", "test_file")
    with tf.name_scope("test_data"):
        test_rows = csv_reader.read_csv(test_path,
                                        int(config["TEST"]["batch_size"]))
import matplotlib.pyplot as plt

locations = []


def onclick(event):
    x = event.xdata
    y = event.ydata
    plt.scatter(x, y)
    locations.append([x, y])
    plt.show()


# Get all necessary inputs from the user
filename = input("Enter CSV filename: ")
csv_data, columns = read_csv(filename)

additional_n = int(
    input(
        "Your data has {} columns; If you have static data to add enter the number (Enter 0 if you have none): "
        .format(len(columns))))
additional_data = []
print("Enter your static data: ")
for _ in range(additional_n):
    additional_data.append(input("::: "))

csv_data = [(data + additional_data) for data in csv_data]

template_name = input("Enter the Template path: ")

im = plt.imread(template_name)
Exemple #15
0
import csv_reader as cs
import clean_data as cl

if __name__ == "__main__":
     print "Main"

enrollments = cs.read_csv('data/enrollments.csv')
daily_engagement = cs.read_csv('data/daily_engagement.csv')
project_submissions = cs.read_csv('data/project_submissions.csv')

print(enrollments[0])
print(daily_engagement[0])
print(project_submissions[0])

# CLEAN UP THE DATA TYPE
for enrollment in enrollments:
    enrollment['cancel_date'] = cl.parse_date(enrollment['cancel_date'])
    enrollment['days_to_cancel'] = cl.parse_maybe_int(enrollment['days_to_cancel'])
    enrollment['is_canceled'] = enrollment['is_canceled'] == 'True'
    enrollment['is_udacity'] = enrollment['is_udacity'] == 'True'
    enrollment['join_date'] = cl.parse_date(enrollment['join_date'])
    
for engagement_record in daily_engagement:
    engagement_record['lessons_completed'] = int(float(engagement_record['lessons_completed']))
    engagement_record['num_courses_visited'] = int(float(engagement_record['num_courses_visited']))
    engagement_record['projects_completed'] = int(float(engagement_record['projects_completed']))
    engagement_record['total_minutes_visited'] = float(engagement_record['total_minutes_visited'])
    engagement_record['utc_date'] = cl.parse_date(engagement_record['utc_date'])
    
for submission in project_submissions:
    submission['completion_date'] = cl.parse_date(submission['completion_date'])
Exemple #16
0
from csv_reader import read_csv
import pandas as pd
import time

if __name__ == "__main__":

    csvfilepath = 'weather-data/indoor-temperature-1617.csv'

    t1 = time.time()
    csv = read_csv(csvfilepath)
    t2 = time.time()

    assert len(csv) == 354

    df = pd.DataFrame(csv)
    print(df.head())
    df["Humidity"] = pd.to_numeric(df["Humidity"], errors='coerce')
    df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce')
    df["Temperature_range (low)"] = pd.to_numeric(
        df["Temperature_range (low)"], errors='coerce')
    df["Temperature_range (high)"] = pd.to_numeric(
        df["Temperature_range (high)"], errors='coerce')

    # Compare with Pandas CSV reader:
    t3 = time.time()
    df2 = pd.read_csv(csvfilepath)
    t4 = time.time()

    print("Reading {} lines in {} compared to Pandas reader in {}".format(
        df2.shape[0], t2 - t1, t4 - t3))
Exemple #17
0
# importing the required module
import csv_reader
import dr_harsha_filter
import plot_data
import moving_average_filter

# Defines window size for moving window filter
window_size = 100

# Defines file name to read data
file_name = 'assets/data_set.csv'

# Reads data from the csv file
raw_data = csv_reader.read_csv(file_name)

# x values of data set
x = raw_data[0]

# y values of data set
y = raw_data[1]

# Plots raw data
plot_data.plot(x, y, 'Raw Data')

# Filters data from Dr. Harasha's filter
filtered_data_from_dr_harsha_filter = dr_harsha_filter.filter_by_dr_harsha_filter(
    y, 0.05)

# Plots data recieved from Dr. Harsha's filter
plot_data.plot(x, filtered_data_from_dr_harsha_filter,
               'Dr. Harsha\'s Filter Data')
import sys

from person import Person
from person import save_people
from csv_reader import read_csv
from csv_reader import Relation

CSV_FILENAME = sys.argv[1]

COMMON_PARENT_NAME = 'NO NAME PARENT'
NEW_CHILD_NAME = 'NO NAME PERSON'

relationships = read_csv(CSV_FILENAME)

people = {}
name_to_person = {}
next_person_id = 1

RELATION_ORDER = {
    Relation.FATHER: 0,
    Relation.MOTHER: 0,
    Relation.HUSBAND: 0,
    Relation.WIFE: 0,
    Relation.SON: 1,
    Relation.DAUGHTER: 1,
    Relation.BROTHER: 10,
    Relation.SISTER: 10,
    Relation.SONS_WIFE: 20,
    Relation.DAUGHTERS_HUSBAND: 20,
    Relation.WIFES_FATHER: 21,
    Relation.WIFES_MOTHER: 21,
Exemple #19
0
def show_graphs():

    data_dict = cr.split_data_coins(*cr.read_csv())
    coin = select_currency(data_dict)
    dg.graph_coin(data_dict, coin)
Exemple #20
0
import sys
from csv_reader import read_csv
import pandas as pd

if __name__ == "__main__":
    """ Invokes CSV reader with target CSV file as the first argument
    
    Usage: python csv_cli.py [path_to_csv_file]
    """

    # Select CSV document to read:
    if len(sys.argv) > 1:

        # User chosen CSV document:
        csv_file = sys.argv[1]

        # Read CSV document:
        print("\nAttempting to read {}:".format(csv_file))
        csv = read_csv(csv_file, header=True)

        # Convert CSV document to Pandas dataframe:
        print(
            "\nAttempting to convert to Pandas dataframe {}:".format(csv_file))
        df_indoor = pd.DataFrame(csv)
        print(df_indoor.head())

    else:
        # Default example CSV document:
        print("Usage: csv_cli.py [path_to_csv_file]")
Exemple #21
0
def model_currency(k=10):
    """
    Creates a neural network to model a currency.
    :param k: The degree of cross-validation to be performed.
    :return:
    """

    coin_dict, data = cr.read_csv()

    data = cr.split_data_coins(coin_dict, data)
    coin = select_currency(data)
    data = data[coin]

    model_weights = []
    model_errors = []

    split_data = cr.split_data(data, k)
    split_data = [[[float(e[2]), float(e[5]), float(e[3]), float(e[4])] for e in s] for s in split_data]

    print("Modeling neural networks with k-fold cross-validation")

    for i in range(k):
        model = m.create_model(4, [8, 8, 2])

        raw_data = split_data[:i] + split_data[i+1:]
        training_data = np.array([s[:-1] for s in raw_data])
        m.train_model(model, training_data, np.array([to_expected(s) for s in raw_data]))
        error = m.test_model(model, np.array([split_data[i][:-1]]), np.array([to_expected(split_data[i])]))
        model_weights.append(np.array(m.get_weights(model)))
        model_errors.append(error[0])

    sum_error = sum(1/e for e in model_errors)

    for idx, error in enumerate(model_errors):

        proportion = (1/error)/sum_error
        model_weights[idx] = proportion * model_weights[idx]

    true_weights = sum(model_weights)
    true_model = m.create_model(4, [8, 8, 2])
    m.set_weights(true_model, true_weights)

    while True:
        print("For how long would you like to invest?")
        steps = input("Choice:   ")
        try:
            steps = int(steps)
            assert steps > 0
        except ValueError or AssertionError:
            print("That was not a valid amount of time.")
        break

    revenue = m.predict_model(true_model, np.array([[split_data[-1][-1]]]), steps)
    error = m.test_model(true_model, np.array([s[:-1] for s in split_data]), np.array([to_expected(s) for s in split_data]))
    multiply = [1, 1]
    for r in revenue:
        multiply[0] *= r[0][0]
        multiply[1] *= r[0][1]
    print("Expected revenue: {}  with error percentage at: {}%".format(multiply, error[0]*100))

    return revenue, error
Exemple #22
0
import os
import csv_reader
import summarizer
import intermediate

input_filename = 'Piano-Hero-1_Overall_Fudged_Testing_Data.csv'
input_filepath = os.path.join(os.getcwd(), input_filename)
output_filename = 'Piano-Hero-1-Data-Summary.csv'
output_filepath = os.path.join(os.getcwd(), output_filename)
intermediate_output_filename = 'Piano-Hero-1-Intermediate-Output.csv'
intermediate_output_filepath = os.path.join(os.getcwd(), intermediate_output_filename)

intermediate_data_dict = csv_reader.read_csv(input_filepath)

summarizer.data_summary(intermediate_data_dict, output_filepath)
intermediate.intermediate_data(intermediate_data_dict, intermediate_output_filepath)
import cv2
import csv_reader as csvr
import email_sender as esender
import os

# Script Settings
isDebug         = True # Setting this into True will prevent the script to send the actual email to intended recipients
attendee_list   = csvr.read_csv("src/csv/ListOfAttendees-Test.csv") 
template_path   = "src/template/globalaibootcamp-template.png" # Specify email template path
imageWxH        = (1360, 1024)
font            = cv2.FONT_HERSHEY_SIMPLEX
fontScale       = 2
fontColor       = (0,0,0) # Black
lineType        = 2
certificate_folder = "certificate"

# Email Details
emailSubject = "INSERT EVENT NAME"

def generate_email_body(name):
    return ("***This email and attachment is automatically generated using a script. *** <br><br>"
            "Hi " + name + ", <br><br>"
            "INSERT YOUR EMAIL BODY HERE"
           )

def send_certificate():
    """ 
    Iterate through the attendee list from the CSV file,
    Insert the attendee name on the center of the certificate image,
    then opens Outlook Application and sends the email to the recipients that includes the certificate attachment
    """
Exemple #24
0
    parser = argparse.ArgumentParser(description='Send CSV over to API')
    parser.add_argument('--csv',
                        help='csv data file path',
                        required=False,
                        default='data.csv')
    parser.add_argument('--json',
                        help='data template file path',
                        required=False,
                        default='template.json')
    parser.add_argument('--config',
                        help='Config file path',
                        required=False,
                        default='config.yml')
    return parser.parse_args()


if __name__ == '__main__':

    cli_args = parse_cli()
    config = get_config(cli_args)

    data = csv_reader.read_csv(cli_args.csv)
    template = template_reader.json_reader('template.json')

    multi_json_to_post = merge(template, data)

    for json_to_post in multi_json_to_post:
        res = api.post('http://mockbin.com/request', '', '', json_to_post)
        logger.info('Results %s',
                    json.dumps(res.json(), indent=4, sort_keys=True))