Esempio n. 1
0
def parse(data,
          columns=None,
          schema=None,
          guess=False,
          skip_header=False,
          encoding='UTF-8',
          **kw):
    """This method is a generator that returns one CSV row at a time.  To
    do the job it wraps the standard Python's csv parser.
    """
    # Find out the dialect
    if data:
        lines = data.splitlines(True)
        # The first line is a header
        if skip_header is True:
            del lines[0]
        # The dialect
        if guess is True:
            dialect = sniffer.sniff('\n'.join(lines[:10]))
            # Fix the f*****g sniffer
            dialect.doublequote = True
            if dialect.delimiter == '' or dialect.delimiter == ' ':
                dialect.delimiter = ','
            reader = read_csv(lines, dialect, **kw)
        else:
            reader = read_csv(lines, **kw)

        # Find out the number of columns, if not specified
        if columns is not None:
            n_columns = len(columns)
        else:
            line = reader.next()
            n_columns = len(line)
            yield line
        # Go
        for line in reader:
            if len(line) != n_columns:
                msg = (
                    'CSV syntax error: wrong number of columns at line %d: %s')
                line_num = getattr(reader, 'line_num', None)
                raise ValueError, msg % (line_num, line)
            if schema is not None:
                datatypes = [schema.get(c, String) for c in columns]
                decoded = []
                for i, datatype in enumerate(datatypes):
                    try:
                        value = datatype.decode(line[i], encoding=encoding)
                    except TypeError:
                        value = datatype.decode(line[i])
                    decoded.append(value)
                line = decoded
            yield line
Esempio n. 2
0
def parse(data,
          columns=None,
          schema=None,
          guess=False,
          has_header=False,
          encoding='UTF-8',
          **kw):
    """This method is a generator that returns one CSV row at a time.  To
    do the job it wraps the standard Python's csv parser.
    """
    if not data:
        return

    lines = data.splitlines(True)

    # 1. The reader, guess dialect if requested
    if guess is True:
        dialect = sniffer.sniff('\n'.join(lines[:10]))
        # Fix the sniffer
        dialect.doublequote = True
        if dialect.delimiter == '' or dialect.delimiter == ' ':
            dialect.delimiter = ','
        reader = read_csv(lines, dialect, **kw)
    else:
        reader = read_csv(lines, **kw)

    # 2. Find out the number of columns, if not specified
    line = reader.next()
    n_columns = len(columns) if columns is not None else len(line)

    # 3. The header
    if has_header is True:
        datatypes = [Unicode for x in range(n_columns)]
        datatypes = enumerate(datatypes)
        datatypes = list(datatypes)
        header, line = parse_line(reader, line, datatypes, encoding, n_columns)
        yield header

    # 4. The content
    if schema is not None:
        datatypes = [schema.get(c, String) for c in columns]
    else:
        datatypes = [String for x in range(n_columns)]
    datatypes = enumerate(datatypes)
    datatypes = list(datatypes)

    while line is not None:
        decoded, line = parse_line(reader, line, datatypes, encoding,
                                   n_columns)
        yield decoded
Esempio n. 3
0
def save_diagram():
    global regionpart_names

    now = datetime.datetime.now()
    ids = 0
    while ids < len(regionpart_names):
        region_id = regionpart_names[ids]

        sample_data = csv.read_csv(f"data/{region_id.replace('/', '_')}.csv")

        plt.plot(sample_data.Datum, sample_data.Erle, "-o")
        plt.plot(sample_data.Datum, sample_data.Beifuss, "-o")
        plt.plot(sample_data.Datum, sample_data.Ambrosia, "-o")
        plt.plot(sample_data.Datum, sample_data.Roggen, "-o")
        plt.plot(sample_data.Datum, sample_data.Esche, "-o")
        plt.plot(sample_data.Datum, sample_data.Birke, "-o")
        plt.plot(sample_data.Datum, sample_data.Graeser, "-o")
        plt.plot(sample_data.Datum, sample_data.Hasel, "-o")
        plt.title(region_id)
        plt.xlabel("Datum")
        plt.ylabel("Belastungsstärke")
        plt.legend([
            "Erle", "Beifuß", "Ambrosia", "Roggen", "Esche", "Birke", "Gräser",
            "Hasel"
        ])
        plt.savefig(
            now.strftime(f"diagrams/%Y/%B/{region_id.replace('/', '_')}.png"))
        # plt.show()
        ids += 1
Esempio n. 4
0
def parse(data, columns=None, schema=None, guess=False, skip_header=False,
          encoding='UTF-8', **kw):
    """This method is a generator that returns one CSV row at a time.  To
    do the job it wraps the standard Python's csv parser.
    """
    # Find out the dialect
    if data:
        lines = data.splitlines(True)
        # The first line is a header
        if skip_header is True:
            del lines[0]
        # The dialect
        if guess is True:
            dialect = sniffer.sniff('\n'.join(lines[:10]))
            # Fix the f*****g sniffer
            dialect.doublequote = True
            if dialect.delimiter == '' or dialect.delimiter == ' ':
                dialect.delimiter = ','
            reader = read_csv(lines, dialect, **kw)
        else:
            reader = read_csv(lines, **kw)

        # Find out the number of columns, if not specified
        if columns is not None:
            n_columns = len(columns)
        else:
            line = reader.next()
            n_columns = len(line)
            yield line
        # Go
        for line in reader:
            if len(line) != n_columns:
                msg = (
                    'CSV syntax error: wrong number of columns at line %d: %s')
                line_num = getattr(reader, 'line_num', None)
                raise ValueError, msg % (line_num, line)
            if schema is not None:
                datatypes = [schema.get(c, String) for c in columns]
                decoded = []
                for i, datatype in enumerate(datatypes):
                    try:
                        value = datatype.decode(line[i], encoding=encoding)
                    except TypeError:
                        value = datatype.decode(line[i])
                    decoded.append(value)
                line = decoded
            yield line
Esempio n. 5
0
    def __init__(self, filename, timecol=-1, rpmcol=-1, afrcol=-1):
        self.filename = filename
        self.data = csv.read_csv(filename)
        self.data = csv.absolve(self.data)

        self.timecol = timecol
        self.rpmcol = rpmcol
        self.afrcol = afrcol
Esempio n. 6
0
    def __init__(self, filename, timecol = -1, rpmcol = -1, afrcol = -1):
        self.filename = filename
        self.data = csv.read_csv(filename)
        self.data = csv.absolve(self.data)

        self.timecol = timecol
        self.rpmcol  = rpmcol
        self.afrcol  = afrcol
def clean_data(root_dir, cases):
    for case in cases:
        # Create the splits
        dirs = [x[0] for x in os.walk(root_dir + case)][1:]
        # Go into every subdirectory
        for sub_dir in dirs:
            for root, _, file in os.walk(sub_dir):
                print(root)
                file = sorted(file)
                vector_file = root + "/" + file[-1]
                vectors = csv.read_csv(vector_file, header=-1)
                delete_rows = []
                for i in range(1, len(file), 2):
                    label = [
                        round(
                            float(
                                vectors[vectors[0] == float(file[i][:-8])][j])
                            * 10) for j in range(8, 14)
                    ]
                    if sum(label) == 0:
                        #print(float(file[i][:-8]))
                        #print(root+"/"+file[i]) #rgb
                        #print(root+"/"+file[i-1]) #depth
                        delete_rws.append(file[i][:-8])
                        os.remove(root + "/" + file[i])
                        os.remove(root + "/" + file[i - 1])
                if (len(file) - 1) / 2 != len(delete_rows):
                    last = None
                    last_num = 0
                    clean_file = root + "/vector2.txt"
                    with open(vector_file,
                              "rb") as input, open(clean_file, "wb") as out:
                        writer = csv.writer(out)
                        for row in csv.reader(input):
                            if row[0] not in delete_rows:
                                writer.writerow(row)
                    with open(clean_file, "r") as fd:
                        last = [l for l in fd][-1]
                        last = last.strip().split(',')
                        last_num = int(last[0])
                        last[8:14] = ['0.0' for _ in range(8, 14)]
                    counter = last_num + 1
                    with open(clean_file, "a") as fd:
                        for _ in range(10):
                            shutil.copy(
                                root + "/" + str(last_num) + "_depth.png",
                                root + "/" + str(counter) + "_depth.png")
                            shutil.copy(
                                root + "/" + str(last_num) + "_rgb.png",
                                root + "/" + str(counter) + "_rgb.png")
                            row = copy.deepcopy(last)
                            row[0] = str(counter)
                            row = ",".join(row) + "\n"
                            counter += 1
                            fd.write(row)
                os.remove(vector_file)
            if len(os.listdir(sub_dir)) == 0:
                os.rmdir(sub_dir)
Esempio n. 8
0
def parse(data, columns=None, schema=None, guess=False, has_header=False,
          encoding='UTF-8', **kw):
    """This method is a generator that returns one CSV row at a time.  To
    do the job it wraps the standard Python's csv parser.
    """
    if not data:
        return

    lines = data.splitlines(True)

    # 1. The reader, guess dialect if requested
    if guess is True:
        dialect = sniffer.sniff('\n'.join(lines[:10]))
        # Fix the sniffer
        dialect.doublequote = True
        if dialect.delimiter == '' or dialect.delimiter == ' ':
            dialect.delimiter = ','
        reader = read_csv(lines, dialect, **kw)
    else:
        reader = read_csv(lines, **kw)

    # 2. Find out the number of columns, if not specified
    line = reader.next()
    n_columns = len(columns) if columns is not None else len(line)

    # 3. The header
    if has_header is True:
        datatypes = [ Unicode for x in range(n_columns) ]
        datatypes = enumerate(datatypes)
        datatypes = list(datatypes)
        header, line = parse_line(reader, line, datatypes, encoding, n_columns)
        yield header

    # 4. The content
    if schema is not None:
        datatypes = [ schema.get(c, String) for c in columns ]
    else:
        datatypes = [ String for x in range(n_columns) ]
    datatypes = enumerate(datatypes)
    datatypes = list(datatypes)

    while line is not None:
        decoded, line = parse_line(reader, line, datatypes, encoding, n_columns)
        yield decoded
Esempio n. 9
0
def main():
    #read in the training and test data
    train = csv_io.read_csv("data/train.csv")
    #the first column of the training set will be the target for the random forest classifier
    target = [x[0] for x in train]
    train = [x[1:] for x in train]
    test = csv.read_csv("data/test.csv")

    #create and train the random forest
    print(train.data)
Esempio n. 10
0
    def make_instance(r_dir: str, meta: Dict[str, Any], log: bool = False) -> 'Analyzer':
        """
        Factory method of Analyzer which handles validation, using
        this is recommended over using the constructor directly unless
        validation on input has already been performed.
        """
        if not p.isdir(r_dir):
            raise errors.ArgumentError("r_dir", "wasn't directory path")

        uuid_meta_filename = p.join(r_dir, "uuid-times.csv")
        if not p.isfile(uuid_meta_filename):
            raise errors.ArgumentError("directory_name", "doesn't contain uuid times")

        # read the uuid meta file
        with open(uuid_meta_filename) as metafile:
            uuidmeta = { row['id']: util.TaskMeta.create(**row) for row in read_csv(metafile) }

        return Analyzer(uuidmeta, r_dir, meta, log)
Esempio n. 11
0
import os
import csv
filecsvname = input("name of first file: ")

# "election_data_1.csv", "election_data_2.csv"
data = csv.read_csv(filecsvname)

Total = int(data.index.size)
print("Election Results")
print("Total Votes : ", str(Total))
print("-----------------------------------")

candi_name = data["Candidate"].unique()
candi_name  # get candidate name
candi_count = data.groupby("Candidate")
candidate_count = candi_count.agg({'Voter ID': "count"})["Voter ID"].tolist()

percentage = []
for i in range(len(candidate_count)):
    percentage.append((candidate_count[i] / Total))
lst = []
for i in range(len(candidate_count)):
    x = candi_name[i] + ": " + str("{:.2%}".format(
        percentage[i])) + " (" + str(candidate_count[i]) + ")"
    print(x)
    lst.append(x)

candidate_winner = candi_count.agg({
    'Voter ID': "count"
}).sort_values(by="Voter ID", ascending=False)
Esempio n. 12
0
from sys import argv

import numpy as np
from matplotlib import pyplot as plt

from csv import read_csv

if __name__ == "__main__":
    args = argv[1:]

    assert len(args) == 2

    L, vf, ff, particles = read_csv(args[0])

    contact_numbers = [p.Z for p in particles]

    count, edges = np.histogram(contact_numbers, bins=list(range(15)))
    contact_numbers = np.average([edges[:-1], edges[1:]], axis=0)
    probability = np.divide(count, len(particles))

    plt.plot(contact_numbers, probability)
    plt.ylabel('Probability $\\mathcal{P}(Z)$')
    plt.xlabel('Contact number $Z$')
    plt.savefig(args[1])
Esempio n. 13
0
    n = 5

    for i in range(n):
        categories, colours = choose_palette()
        for j, colour in enumerate(colours):
            colour_ax.add_artist(
                plt.Circle([i, j], 0.3, color=hsv2rgb(*colour)))
            plt.text(i, j, name_colour(colour), ha='center', va='center')
    plt.xlim(left=-0.5, right=n - 0.5)
    plt.ylim(bottom=-0.5, top=len(colours) - 0.5)
    plt.yticks(ticks=list(range(len(categories))), labels=categories)


if __name__ == '__main__':
    planet_data = read_csv('data/planet_data.csv')

    plt.rcParams['figure.subplot.wspace'] = 0.4
    plt.rcParams['figure.subplot.hspace'] = 0.4

    fig, axes = plt.subplots(ncols=3, nrows=2, figsize=(20, 10))

    size_ax, density_ax, temp_ax, pressure_ax, element_ax, colour_ax = axes.flatten(
    )

    size_data = np.multiply(planet_data['fpl_rade'], 2 * 4.25875046e-5)
    plot_size_dist(size_data, size_ax)

    density_data = planet_data['fpl_dens']
    plot_density_dist(density_data, density_ax)
Esempio n. 14
0
    def create(meta: util.TaskMeta, path: str, parent: Analyzer) -> 'PackageResultAnalyzer':
        time_stamps_f = p.join(path, 'time_stamps.csv')
        if not p.isfile(time_stamps_f):
            raise errors.InvalidTaskDir(path)

        with open(time_stamps_f, mode='r') as metafile:
            taskmeta = { row['version']: util.PackageResultMeta.create(**row) for row in read_csv(metafile) }

        return PackageResultAnalyzer(meta, taskmeta, path, parent._should_log)