def parse(data, columns=None, schema=None, guess=False, skip_header=False, encoding='UTF-8', **kw): """This method is a generator that returns one CSV row at a time. To do the job it wraps the standard Python's csv parser. """ # Find out the dialect if data: lines = data.splitlines(True) # The first line is a header if skip_header is True: del lines[0] # The dialect if guess is True: dialect = sniffer.sniff('\n'.join(lines[:10])) # Fix the f*****g sniffer dialect.doublequote = True if dialect.delimiter == '' or dialect.delimiter == ' ': dialect.delimiter = ',' reader = read_csv(lines, dialect, **kw) else: reader = read_csv(lines, **kw) # Find out the number of columns, if not specified if columns is not None: n_columns = len(columns) else: line = reader.next() n_columns = len(line) yield line # Go for line in reader: if len(line) != n_columns: msg = ( 'CSV syntax error: wrong number of columns at line %d: %s') line_num = getattr(reader, 'line_num', None) raise ValueError, msg % (line_num, line) if schema is not None: datatypes = [schema.get(c, String) for c in columns] decoded = [] for i, datatype in enumerate(datatypes): try: value = datatype.decode(line[i], encoding=encoding) except TypeError: value = datatype.decode(line[i]) decoded.append(value) line = decoded yield line
def parse(data, columns=None, schema=None, guess=False, has_header=False, encoding='UTF-8', **kw): """This method is a generator that returns one CSV row at a time. To do the job it wraps the standard Python's csv parser. """ if not data: return lines = data.splitlines(True) # 1. The reader, guess dialect if requested if guess is True: dialect = sniffer.sniff('\n'.join(lines[:10])) # Fix the sniffer dialect.doublequote = True if dialect.delimiter == '' or dialect.delimiter == ' ': dialect.delimiter = ',' reader = read_csv(lines, dialect, **kw) else: reader = read_csv(lines, **kw) # 2. Find out the number of columns, if not specified line = reader.next() n_columns = len(columns) if columns is not None else len(line) # 3. The header if has_header is True: datatypes = [Unicode for x in range(n_columns)] datatypes = enumerate(datatypes) datatypes = list(datatypes) header, line = parse_line(reader, line, datatypes, encoding, n_columns) yield header # 4. The content if schema is not None: datatypes = [schema.get(c, String) for c in columns] else: datatypes = [String for x in range(n_columns)] datatypes = enumerate(datatypes) datatypes = list(datatypes) while line is not None: decoded, line = parse_line(reader, line, datatypes, encoding, n_columns) yield decoded
def save_diagram(): global regionpart_names now = datetime.datetime.now() ids = 0 while ids < len(regionpart_names): region_id = regionpart_names[ids] sample_data = csv.read_csv(f"data/{region_id.replace('/', '_')}.csv") plt.plot(sample_data.Datum, sample_data.Erle, "-o") plt.plot(sample_data.Datum, sample_data.Beifuss, "-o") plt.plot(sample_data.Datum, sample_data.Ambrosia, "-o") plt.plot(sample_data.Datum, sample_data.Roggen, "-o") plt.plot(sample_data.Datum, sample_data.Esche, "-o") plt.plot(sample_data.Datum, sample_data.Birke, "-o") plt.plot(sample_data.Datum, sample_data.Graeser, "-o") plt.plot(sample_data.Datum, sample_data.Hasel, "-o") plt.title(region_id) plt.xlabel("Datum") plt.ylabel("Belastungsstärke") plt.legend([ "Erle", "Beifuß", "Ambrosia", "Roggen", "Esche", "Birke", "Gräser", "Hasel" ]) plt.savefig( now.strftime(f"diagrams/%Y/%B/{region_id.replace('/', '_')}.png")) # plt.show() ids += 1
def __init__(self, filename, timecol=-1, rpmcol=-1, afrcol=-1): self.filename = filename self.data = csv.read_csv(filename) self.data = csv.absolve(self.data) self.timecol = timecol self.rpmcol = rpmcol self.afrcol = afrcol
def __init__(self, filename, timecol = -1, rpmcol = -1, afrcol = -1): self.filename = filename self.data = csv.read_csv(filename) self.data = csv.absolve(self.data) self.timecol = timecol self.rpmcol = rpmcol self.afrcol = afrcol
def clean_data(root_dir, cases): for case in cases: # Create the splits dirs = [x[0] for x in os.walk(root_dir + case)][1:] # Go into every subdirectory for sub_dir in dirs: for root, _, file in os.walk(sub_dir): print(root) file = sorted(file) vector_file = root + "/" + file[-1] vectors = csv.read_csv(vector_file, header=-1) delete_rows = [] for i in range(1, len(file), 2): label = [ round( float( vectors[vectors[0] == float(file[i][:-8])][j]) * 10) for j in range(8, 14) ] if sum(label) == 0: #print(float(file[i][:-8])) #print(root+"/"+file[i]) #rgb #print(root+"/"+file[i-1]) #depth delete_rws.append(file[i][:-8]) os.remove(root + "/" + file[i]) os.remove(root + "/" + file[i - 1]) if (len(file) - 1) / 2 != len(delete_rows): last = None last_num = 0 clean_file = root + "/vector2.txt" with open(vector_file, "rb") as input, open(clean_file, "wb") as out: writer = csv.writer(out) for row in csv.reader(input): if row[0] not in delete_rows: writer.writerow(row) with open(clean_file, "r") as fd: last = [l for l in fd][-1] last = last.strip().split(',') last_num = int(last[0]) last[8:14] = ['0.0' for _ in range(8, 14)] counter = last_num + 1 with open(clean_file, "a") as fd: for _ in range(10): shutil.copy( root + "/" + str(last_num) + "_depth.png", root + "/" + str(counter) + "_depth.png") shutil.copy( root + "/" + str(last_num) + "_rgb.png", root + "/" + str(counter) + "_rgb.png") row = copy.deepcopy(last) row[0] = str(counter) row = ",".join(row) + "\n" counter += 1 fd.write(row) os.remove(vector_file) if len(os.listdir(sub_dir)) == 0: os.rmdir(sub_dir)
def parse(data, columns=None, schema=None, guess=False, has_header=False, encoding='UTF-8', **kw): """This method is a generator that returns one CSV row at a time. To do the job it wraps the standard Python's csv parser. """ if not data: return lines = data.splitlines(True) # 1. The reader, guess dialect if requested if guess is True: dialect = sniffer.sniff('\n'.join(lines[:10])) # Fix the sniffer dialect.doublequote = True if dialect.delimiter == '' or dialect.delimiter == ' ': dialect.delimiter = ',' reader = read_csv(lines, dialect, **kw) else: reader = read_csv(lines, **kw) # 2. Find out the number of columns, if not specified line = reader.next() n_columns = len(columns) if columns is not None else len(line) # 3. The header if has_header is True: datatypes = [ Unicode for x in range(n_columns) ] datatypes = enumerate(datatypes) datatypes = list(datatypes) header, line = parse_line(reader, line, datatypes, encoding, n_columns) yield header # 4. The content if schema is not None: datatypes = [ schema.get(c, String) for c in columns ] else: datatypes = [ String for x in range(n_columns) ] datatypes = enumerate(datatypes) datatypes = list(datatypes) while line is not None: decoded, line = parse_line(reader, line, datatypes, encoding, n_columns) yield decoded
def main(): #read in the training and test data train = csv_io.read_csv("data/train.csv") #the first column of the training set will be the target for the random forest classifier target = [x[0] for x in train] train = [x[1:] for x in train] test = csv.read_csv("data/test.csv") #create and train the random forest print(train.data)
def make_instance(r_dir: str, meta: Dict[str, Any], log: bool = False) -> 'Analyzer': """ Factory method of Analyzer which handles validation, using this is recommended over using the constructor directly unless validation on input has already been performed. """ if not p.isdir(r_dir): raise errors.ArgumentError("r_dir", "wasn't directory path") uuid_meta_filename = p.join(r_dir, "uuid-times.csv") if not p.isfile(uuid_meta_filename): raise errors.ArgumentError("directory_name", "doesn't contain uuid times") # read the uuid meta file with open(uuid_meta_filename) as metafile: uuidmeta = { row['id']: util.TaskMeta.create(**row) for row in read_csv(metafile) } return Analyzer(uuidmeta, r_dir, meta, log)
import os import csv filecsvname = input("name of first file: ") # "election_data_1.csv", "election_data_2.csv" data = csv.read_csv(filecsvname) Total = int(data.index.size) print("Election Results") print("Total Votes : ", str(Total)) print("-----------------------------------") candi_name = data["Candidate"].unique() candi_name # get candidate name candi_count = data.groupby("Candidate") candidate_count = candi_count.agg({'Voter ID': "count"})["Voter ID"].tolist() percentage = [] for i in range(len(candidate_count)): percentage.append((candidate_count[i] / Total)) lst = [] for i in range(len(candidate_count)): x = candi_name[i] + ": " + str("{:.2%}".format( percentage[i])) + " (" + str(candidate_count[i]) + ")" print(x) lst.append(x) candidate_winner = candi_count.agg({ 'Voter ID': "count" }).sort_values(by="Voter ID", ascending=False)
from sys import argv import numpy as np from matplotlib import pyplot as plt from csv import read_csv if __name__ == "__main__": args = argv[1:] assert len(args) == 2 L, vf, ff, particles = read_csv(args[0]) contact_numbers = [p.Z for p in particles] count, edges = np.histogram(contact_numbers, bins=list(range(15))) contact_numbers = np.average([edges[:-1], edges[1:]], axis=0) probability = np.divide(count, len(particles)) plt.plot(contact_numbers, probability) plt.ylabel('Probability $\\mathcal{P}(Z)$') plt.xlabel('Contact number $Z$') plt.savefig(args[1])
n = 5 for i in range(n): categories, colours = choose_palette() for j, colour in enumerate(colours): colour_ax.add_artist( plt.Circle([i, j], 0.3, color=hsv2rgb(*colour))) plt.text(i, j, name_colour(colour), ha='center', va='center') plt.xlim(left=-0.5, right=n - 0.5) plt.ylim(bottom=-0.5, top=len(colours) - 0.5) plt.yticks(ticks=list(range(len(categories))), labels=categories) if __name__ == '__main__': planet_data = read_csv('data/planet_data.csv') plt.rcParams['figure.subplot.wspace'] = 0.4 plt.rcParams['figure.subplot.hspace'] = 0.4 fig, axes = plt.subplots(ncols=3, nrows=2, figsize=(20, 10)) size_ax, density_ax, temp_ax, pressure_ax, element_ax, colour_ax = axes.flatten( ) size_data = np.multiply(planet_data['fpl_rade'], 2 * 4.25875046e-5) plot_size_dist(size_data, size_ax) density_data = planet_data['fpl_dens'] plot_density_dist(density_data, density_ax)
def create(meta: util.TaskMeta, path: str, parent: Analyzer) -> 'PackageResultAnalyzer': time_stamps_f = p.join(path, 'time_stamps.csv') if not p.isfile(time_stamps_f): raise errors.InvalidTaskDir(path) with open(time_stamps_f, mode='r') as metafile: taskmeta = { row['version']: util.PackageResultMeta.create(**row) for row in read_csv(metafile) } return PackageResultAnalyzer(meta, taskmeta, path, parent._should_log)