import csv with open('data1.csv', newline='') as source: dialect = csv.Sniffer().sniff(source.readline()) source.seek(0) reader = csv.reader(source, dialect) number_of_columns = len(next(reader)) source.seek(0) with open("data/brazil.csv", "w") as goalkeepers_file: gk_writer = csv.writer(goalkeepers_file) with open("data/players.csv", "w") as players_file: pl_writer = csv.writer(players_file) gk_index = 0 pl_index = 0 position_column = -1 for r in reader: for i in range(number_of_columns): if r[i] == "Nationality": position_column = i source.seek(0) header = next(reader) gk_writer.writerow(header) pl_writer.writerow(header) for r in reader: if r[position_column] == "Brazil": r[0] = gk_index gk_writer.writerow(r) gk_index += 1 else:
def clean_file(self): """Parses the CSV file.""" file = self.cleaned_data.get("file") if file._size > self.max_upload_size: raise forms.ValidationError( _(u"Uploaded file is too large ( > 1MB )")) if file: try: dialect = csv.Sniffer().sniff(file.read(1024)) file.seek(0) reader = csv.reader(file, dialect) try: header_row = reader.next() except StopIteration: raise forms.ValidationError("That CSV file is empty.") headers = [ RE_WHITESPACE.sub( "_", cell.decode("utf-8", "ignore").lower().strip()) for cell in header_row ] # Check the required fields. if len(headers) == 0: raise forms.ValidationError( "That CSV file did not contain a valid header line.") if not "project-task" in headers: raise forms.ValidationError( "Could not find a column labelled 'project-task' in that CSV file." ) if not "file-name" in headers: raise forms.ValidationError( "Could not find a column labelled 'file-name' in that CSV file." ) if not "folder-name" in headers: raise forms.ValidationError( "Could not find a column labelled 'folder-name' in that CSV file." ) # Go through the rest of the CSV file. clean_rows = [] invalid_rows = [] invalid_cells = [] for y_index, row in enumerate(reader, 2): row = [ cell.decode("utf-8", "ignore").strip() for cell in row ] try: row_data = dict(zip(headers, row)) except IndexError: invalid_rows.append((y_index, row_data)) # ignore blank rows if not ''.join(str(x) for x in row): continue for x_index, cell_value in enumerate(row): try: headers[x_index] except IndexError: continue if headers[x_index]: if not cell_value: invalid_rows.append( (headers[x_index], y_index)) raise ValidationError( u'Missing required value %s for row %s' % (headers[x_index], y_index + 1)) except csv.Error: raise forms.ValidationError("Please upload a valid CSV file.") # Check that some rows were parsed. if not clean_rows and invalid_rows: raise forms.ValidationError( " Workitems could not be imported, due to errors in that CSV file." ) # Store the parsed data. self.cleaned_data["rows"] = clean_rows self.cleaned_data["invalid_rows"] = invalid_rows return file
@requires_segment_info def winnr(pl, segment_info, show_current=True): '''Show window number :param bool show_current: If False do not show current window number. ''' winnr = segment_info['winnr'] if show_current or winnr != vim.current.window.number: return str(winnr) csv_cache = None sniffer = csv.Sniffer() def detect_text_csv_dialect(text, display_name, header_text=None): return ( sniffer.sniff(string(text)), sniffer.has_header(string(header_text or text)) if display_name == 'auto' else display_name, ) CSV_SNIFF_LINES = 100 CSV_PARSE_LINES = 10 if sys.version_info < (2, 7):
def process_csv_lines(csv_lines): dialect = None try: dialect = csv.Sniffer().sniff("".join(csv_lines[:3]), options.delimiter) except csv.Error: # can't guess specific dialect, try without one pass bank_reader = csv.reader(csv_lines, dialect) for i, row in enumerate(bank_reader): # Skip any empty lines in the input if len(row) == 0: continue entry = Entry(row, csv_lines[i], options) # detect duplicate entries in the ledger file and optionally skip or prompt user for action #if options.skip_dupes and csv_lines[i].strip() in csv_comments: if (options.skip_older_than < 0) or (entry.days_old <= options.skip_older_than): if options.clear_screen: print('\033[2J\033[;H') print('\n' + entry.prompt()) if (options.skip_dupes or options.confirm_dupes ) and entry.md5sum in md5sum_hashes: value = 'Y' # if interactive flag was passed prompt user before skipping transaction if options.confirm_dupes: yn_response = prompt_for_value( 'Duplicate transaction detected, skip?', possible_yesno, 'Y') if yn_response: value = yn_response if value.upper().strip() not in ('N', 'NO'): continue while True: payee, account, tags = get_payee_and_account(entry) value = 'C' if options.entry_review: # need to display ledger formatted entry here # # request confirmation before committing transaction print('\n' + 'Ledger Entry:') print(entry.journal_entry(i + 1, payee, account, tags)) yn_response = prompt_for_value( 'Commit transaction (Commit, Modify, Skip)?', ('C', 'M', 'S'), value) if yn_response: value = yn_response if value.upper().strip() not in ('C', 'COMMIT'): if value.upper().strip() in ('S', 'SKIP'): break else: continue else: # add md5sum of new entry, this helps detect duplicate entries in same file md5sum_hashes.add(entry.md5sum) break if value.upper().strip() in ('S', 'SKIP'): continue yield entry.journal_entry(i + 1, payee, account, tags)
def process_csv_file(absolute_base_file, table_name_temp, new_table, geom_table_name, geom_table_id, geom_table_columns, geom_table_geom): # Create table based on CSV import csv f = open(absolute_base_file, 'rb') no_header_row = False with open(absolute_base_file, 'rb') as csvfile: # get the type of delimiter dialect = csv.Sniffer().sniff(csvfile.read()) try: csv_table = table.Table.from_csv(f, name=table_name_temp, no_header_row=no_header_row, delimiter=dialect.delimiter) except: status_code = '400' errormsgs_val = "Failed to create the table from CSV." return errormsgs_val, status_code for idx, column in enumerate(csv_table): column.name = slugify(unicode(column.name)).replace('-', '_') # Check if the selected value from the dropdown menu matches the first value of the CSV header if idx == 0: print("column.name.strip()", column.name.strip()) print("geom_table_id.strip()", geom_table_id.strip()) if column.name.strip() != geom_table_id.strip(): errormsgs_val = "The selected value of Layer Type doesn't match the one of the imported layer." status_code = '400' return errormsgs_val, status_code # Check if there are added columns in the CSV if idx < 2: errormsgs_val = "The CSV has no added columns. Please add extra columns." status_code = '400' return errormsgs_val, status_code else: try: sql_table = sql.make_table(csv_table, table_name_temp) create_table_sql = sql.make_create_table_statement( sql_table, dialect="postgresql") create_table_sql = re.sub(r'VARCHAR\([0-9]*\)', 'VARCHAR(254)', create_table_sql) except: return None, str(sys.exc_info()[0]) constr = "dbname='{dbname}' user='******' host='{host}' password='******'".format( **{ 'dbname': settings.DATABASES['uploaded']['NAME'], 'user': settings.DATABASES['uploaded']['USER'], 'host': settings.DATABASES['uploaded']['HOST'], 'password': settings.DATABASES['uploaded']['PASSWORD'] }) conn = psycopg2.connect(constr) try: # Check if there is already a table with the same name cur = conn.cursor() sqlstr = "SELECT EXISTS(SELECT * FROM information_schema.tables WHERE table_name='{new_table_name}');".format( **{'new_table_name': new_table}) cur.execute(sqlstr) exists = cur.fetchone()[0] if exists: errormsgs_val = "There is already a layer with this name. Please choose another title." status_code = '400' return errormsgs_val, status_code # If temporary table exists then drop it - the create it and add primary key cur.execute('DROP TABLE IF EXISTS %s CASCADE;' % table_name_temp) cur.execute(create_table_sql) conn.commit() sqlstr = "ALTER TABLE IF EXISTS {temp_table} ADD COLUMN fid SERIAL PRIMARY KEY;".format( **{'temp_table': table_name_temp}) cur.execute(sqlstr) conn.commit() except Exception as e: logger.error("Error Creating Temporary table %s:%s", table_name_temp, str(e)) # Copy data to table connection_string = "postgresql://%s:%s@%s:%s/%s" % ( settings.DATABASES['uploaded']['USER'], settings.DATABASES['uploaded']['PASSWORD'], settings.DATABASES['uploaded']['HOST'], settings.DATABASES['uploaded']['PORT'], settings.DATABASES['uploaded']['NAME']) try: engine, metadata = sql.get_connection(connection_string) except ImportError: return None, str(sys.exc_info()[0]) conn_eng = engine.connect() trans = conn_eng.begin() if csv_table.count_rows() > 0: insert = sql_table.insert() headers = csv_table.headers() try: conn_eng.execute( insert, [dict(zip(headers, row)) for row in csv_table.to_rows()]) except: return None, str(sys.exc_info()[0]) trans.commit() conn_eng.close() # Create joined table - drop table_name_temp new_clmns = [] for idx, item in enumerate(headers): if ( idx > 1 ): # The downloaded layer contains two columns from the global table, which do not include them again new_column = "{table_name}.{item}".format(**{ 'table_name': table_name_temp, 'item': item }) new_clmns.append(new_column) added_columns = ', '.join(new_clmns) try: # Joined table sqlstr = "CREATE TABLE {new_table_name} AS (SELECT {geom_table_columns}, {added_columns} FROM {geom_table} INNER JOIN {temp_table} ON (g.{id} = {temp_table}.{id}));".format( **{ 'new_table_name': new_table, 'geom_table': geom_table_name, 'geom_table_columns': geom_table_columns, 'temp_table': table_name_temp, 'id': geom_table_id, 'added_columns': added_columns }) cur.execute(sqlstr) conn.commit() sqlstr = "ALTER TABLE IF EXISTS {new_table_name} ADD COLUMN fid SERIAL PRIMARY KEY;".format( **{'new_table_name': new_table}) cur.execute(sqlstr) conn.commit() sqlstr = "CREATE INDEX indx_{new_table_name} ON {new_table_name} USING btree({id});".format( **{ 'new_table_name': new_table, 'id': geom_table_id, }) cur.execute(sqlstr) conn.commit() sqlstr = "CREATE INDEX indx_geom_{new_table_name} ON {new_table_name} USING GIST({geom});".format( **{ 'new_table_name': new_table, 'geom': geom_table_geom, }) cur.execute(sqlstr) conn.commit() except: print "Failed to create joined table." logger.error("Failed to create joined table.") try: sqlstr = "DROP TABLE IF EXISTS {temp_table} CASCADE;".format( **{'temp_table': table_name_temp}) cur.execute(sqlstr) conn.commit() except: logger.error("Failed to drop temporary table.") conn.close() status_code = 200 errormsgs_val = '' return errormsgs_val, status_code
def make_rwc_popular_index(data_path): annotations_dir = os.path.join(data_path, 'RWC-Popular', 'annotations') metadata_dir = os.path.join(data_path, 'RWC-Popular', 'metadata-master') audio_dir = os.path.join(data_path, 'RWC-Popular', 'audio') annotations_files = os.listdir( os.path.join(annotations_dir, 'AIST.RWC-MDB-P-2001.CHORUS')) metadata_file = os.path.join(metadata_dir, 'rwc-p.csv') with open(metadata_file, 'r', encoding='utf-8') as fhandle: dialect = csv.Sniffer().sniff(fhandle.read(1024)) fhandle.seek(0) reader = csv.reader(fhandle, dialect) piece = [] suffix = [] track = [] for line in reader: if not line[0] == "Piece No.": p = '00' + line[0].split('.')[1][1:] piece.append(p[len(p) - 3:]) suffix.append(line[1][1:]) track.append(line[2][-2:]) mapping_track = {p: t for p, t in zip(piece, track)} mapping_folder = {p: s for p, s in zip(piece, suffix)} track_ids = sorted([ os.path.basename(f).split('.')[0] for f in annotations_files if not f == 'README.TXT' ]) rwc_popular_index = {} for track_id in track_ids: # audio audio_folder = 'rwc-p-m{}'.format(mapping_folder[track_id[4:]]) audio_path = os.path.join(audio_dir, audio_folder) audio_track = str(int(mapping_track[track_id[4:]])) audio_checksum = md5( os.path.join(audio_path, "{}.wav".format(audio_track))) annot_checksum = [] annot_rels = [] for f in ['CHORUS', 'BEAT', 'CHORD', 'VOCA_INST']: if f is 'CHORD': if os.path.exists( os.path.join( annotations_dir, 'AIST.RWC-MDB-P-2001.{}'.format(f), 'RWC_Pop_Chords', 'N{}-M{}-T{}.lab'.format( track_id[-3:], mapping_folder[track_id[-3:]], mapping_track[track_id[-3:]], ), )): annot_checksum.append( md5( os.path.join( annotations_dir, 'AIST.RWC-MDB-P-2001.{}'.format(f), 'RWC_Pop_Chords', 'N{}-M{}-T{}.lab'.format( track_id[-3:], mapping_folder[track_id[-3:]], mapping_track[track_id[-3:]], ), ))) annot_rels.append( os.path.join( 'annotations', 'AIST.RWC-MDB-P-2001.{}'.format(f), 'RWC_Pop_Chords', 'N{}-M{}-T{}.lab'.format( track_id[-3:], mapping_folder[track_id[-3:]], mapping_track[track_id[-3:]], ), )) else: annot_checksum.append(None) annot_rels.append(None) else: if os.path.exists( os.path.join( annotations_dir, 'AIST.RWC-MDB-P-2001.{}'.format(f), '{}.{}.TXT'.format(track_id, f), )): annot_checksum.append( md5( os.path.join( annotations_dir, 'AIST.RWC-MDB-P-2001.{}'.format(f), '{}.{}.TXT'.format(track_id, f), ))) annot_rels.append( os.path.join( 'annotations', 'AIST.RWC-MDB-P-2001.{}'.format(f), '{}.{}.TXT'.format(track_id, f), )) else: annot_checksum.append(None) annot_rels.append(None) rwc_popular_index[track_id] = { 'audio': ( os.path.join('audio', audio_folder, "{}.wav".format(audio_track)), audio_checksum, ), 'sections': (annot_rels[0], annot_checksum[0]), 'beats': (annot_rels[1], annot_checksum[1]), 'chords': (annot_rels[2], annot_checksum[2]), 'voca_inst': (annot_rels[3], annot_checksum[3]), } with open(RWC_POPULAR_INDEX_PATH, 'w') as fhandle: json.dump(rwc_popular_index, fhandle, indent=2)
def read_csv_cirrus(filename): """Read a Cirrus CSV file. Currently exists support for some types of CSV files extracted with NoiseTools. There is no support for CSVs related with occupational noise. If there are NC and NR values in the csv file, they will be stored in the returned object with attributes ``nc`` and ``nr``. If the CSV file contains time history, you can access to date and time with the ``time`` attribute. Also, it is possible to know the integration time with the ``integration_time`` attribute. :param filename: CSV file name. :returns: Pandas dataframe with all data extracted from the CSV file. :rtype: Pandas dataframe. """ with open(filename, "r") as csvfile: csvreader = csvfile.read() csvreader = re.sub(r" dB", "", csvreader) # Clean " dB" from data dialect = csv.Sniffer().sniff(csvreader, delimiters=",;") separator = dialect.delimiter # Guess decimal separator decimal_sep = re.search(r"\"\d{2,3}" r"(\.|,)" # Decimal separator r"\d{1,2}\"", csvreader).group(1) n_cols = re.search("(.+)\n", csvreader).group(1).count(separator) + 1 if n_cols < 5: unsorted_data = [] pdindex = ["Z"] for i, c in enumerate(csvreader.splitlines()): if c[:4] == '"NR"': nr = int(re.search(r"\d{2}", c).group(0)) continue elif c[:4] == '"NC"': nc = int(re.search(r"\d{2}", c).group(0)) continue if i != 0: unsorted_data.append(c.split(separator)) else: if n_cols == 3: pdindex.append(c[-2:-1]) elif n_cols == 4: pdindex.append("A") pdindex.append("C") # Create a sorted temporary csv-like file csv_data = list(zip(*unsorted_data)) temp_csv = "" for row in csv_data: temp_csv += separator.join(row) + "\n" # Then, read it with pandas data = pd.read_csv(io.StringIO(temp_csv), sep=separator, decimal=decimal_sep) # Assign NC and NR data if they are present try: data.nc = nc data.nr = nr except: pass # If the csv file contains global data from the "Details" tab in # NoiseTools, skip row names if n_cols != 2: data.index = pdindex else: data = pd.read_csv(filename, parse_dates=[[0, 1]], sep=separator, decimal=decimal_sep) # Fix time name column en_columns = data.columns.values en_columns[0] = "time" data.columns = en_columns # Guess integration time with statistical mode because the csv could # have been cleaned from unwanted noise data["time"] = pd.to_datetime(data.time) delta = data.time.diff().fillna(0) int_time = int(delta.mode()) * 1e-9 # Mode and change from ns to s if round(int_time, 2) == 0.06: # Fix for 1/16 s int_time = 0.0625 data.integration_time = int_time return data
def test_doublequote(self): sniffer = csv.Sniffer() dialect = sniffer.sniff(self.header) self.assertFalse(dialect.doublequote) dialect = sniffer.sniff(self.sample2) self.assertTrue(dialect.doublequote)
def run(self, args): """Reads in a CSV, performs augmentation, and outputs an augmented CSV. Preserves all columns except for the input (augmneted) column. """ if args.interactive: print("\nRunning in interactive mode...\n") augmenter = eval(AUGMENTATION_RECIPE_NAMES[args.recipe])( pct_words_to_swap=args.pct_words_to_swap, transformations_per_example=args.transformations_per_example, ) print("--------------------------------------------------------") while True: print( '\nEnter a sentence to augment, "q" to quit, "c" to view/change arguments:\n' ) text = input() if text == "q": break elif text == "c": print( f"\nCurrent Arguments:\n\n\t augmentation recipe: {args.recipe}, " f"\n\t pct_words_to_swap: {args.pct_words_to_swap}, " f"\n\t transformations_per_example: {args.transformations_per_example}\n" ) change = input( "Enter 'c' again to change arguments, any other keys to opt out\n" ) if change == "c": print("\nChanging augmenter arguments...\n") recipe = input( "\tAugmentation recipe name ('r' to see available recipes): " ) if recipe == "r": print( "\n\twordnet, embedding, charswap, eda, checklist\n" ) args.recipe = input( "\tAugmentation recipe name: ") else: args.recipe = recipe args.pct_words_to_swap = float( input( "\tPercentage of words to swap (0.0 ~ 1.0): ") ) args.transformations_per_example = int( input("\tTransformations per input example: ")) print("\nGenerating new augmenter...\n") augmenter = eval( AUGMENTATION_RECIPE_NAMES[args.recipe])( pct_words_to_swap=args.pct_words_to_swap, transformations_per_example=args. transformations_per_example, ) print( "--------------------------------------------------------" ) continue elif not text: continue print("\nAugmenting...\n") print( "--------------------------------------------------------") for augmentation in augmenter.augment(text): print(augmentation, "\n") print( "--------------------------------------------------------") else: textattack.shared.utils.set_seed(args.random_seed) start_time = time.time() if not (args.csv and args.input_column): raise ArgumentError( "The following arguments are required: --csv, --input-column/--i" ) # Validate input/output paths. if not os.path.exists(args.csv): raise FileNotFoundError( f"Can't find CSV at location {args.csv}") if os.path.exists(args.outfile): if args.overwrite: textattack.shared.logger.info( f"Preparing to overwrite {args.outfile}.") else: raise OSError( f"Outfile {args.outfile} exists and --overwrite not set." ) # Read in CSV file as a list of dictionaries. Use the CSV sniffer to # try and automatically infer the correct CSV format. csv_file = open(args.csv, "r") dialect = csv.Sniffer().sniff(csv_file.readline(), delimiters=";,") csv_file.seek(0) rows = [ row for row in csv.DictReader( csv_file, dialect=dialect, skipinitialspace=True) ] # Validate input column. row_keys = set(rows[0].keys()) if args.input_column not in row_keys: raise ValueError( f"Could not find input column {args.input_column} in CSV. Found keys: {row_keys}" ) textattack.shared.logger.info( f"Read {len(rows)} rows from {args.csv}. Found columns {row_keys}." ) augmenter = eval(AUGMENTATION_RECIPE_NAMES[args.recipe])( pct_words_to_swap=args.pct_words_to_swap, transformations_per_example=args.transformations_per_example, ) output_rows = [] for row in tqdm.tqdm(rows, desc="Augmenting rows"): text_input = row[args.input_column] if not args.exclude_original: output_rows.append(row) for augmentation in augmenter.augment(text_input): augmented_row = row.copy() augmented_row[args.input_column] = augmentation output_rows.append(augmented_row) # Print to file. with open(args.outfile, "w") as outfile: csv_writer = csv.writer(outfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL) # Write header. csv_writer.writerow(output_rows[0].keys()) # Write rows. for row in output_rows: csv_writer.writerow(row.values()) textattack.shared.logger.info( f"Wrote {len(output_rows)} augmentations to {args.outfile} in {time.time() - start_time}s." )
def __parseCsvFile(self, handle): """ Parse a CSV file. Does not reset the file handle to start. @arg handle: CSV file. Must be a seekable binary file object. @type handle: file object @return: list of lists @rtype: list """ buf = handle.read(BUFFER_SIZE) result = chardet.detect(buf) handle.seek(0) if result['confidence'] > 0.5: encoding = unicode(result['encoding']) else: encoding = 'utf-8' # Python 2.7 makes it extraordinarily hard to do this correctly. We # have a binary file object containing lines of text in a certain # encoding with unknown style of line-endings. # # We want to correctly decode the file contents, accept any style of # line-endings, parse the lines with the `csv` module, and return # unicode strings. # # 1. `codecs.getreader` does not have a universal newlines mode. # 2. `io.TextIOWrapper` cannot be wrapped around our file object, # since it is required to be an `io.BufferedIOBase`, which it # usually will not be. # 3. The `csv` module cannot read unicode. # # Ugh. # # So, we use a stream wrapper that consumes byte strings, decodes to # unicode, normalises newlines, and produces the result UTF-8 encoded. # That's what we feed the `csv` module. We decode what it gives back # to unicode strings. What a mess. handle = _UniversalNewlinesByteStreamIter(handle, encoding=encoding, buffer_size=BUFFER_SIZE) try: buf = handle.read(BUFFER_SIZE) except UnicodeDecodeError: self.__output.addMessage( __file__, 3, 'EBPARSE', 'Could not decode file (using %s encoding).' % encoding) return None # Default dialect dialect = 'excel' # The idea is that for new-style batch input files we have only # one column and the sniffer cannot find a delimiter. try: # Todo: delimiters in config file dialect = csv.Sniffer().sniff(buf, delimiters="\t ;|,") dialect.skipinitialspace = True except csv.Error: #self.__output.addMessage(__file__, 4, "EBPARSE", e) #return None pass #except #Watch out for : delimiter FIXME and for the . delimiter # if dialect.delimiter == ":": # dialect.delimiter = "\t" handle.seek(0) reader = csv.reader(handle, dialect) ret = [] try: for i in reader: ret.append([c.decode('utf-8') for c in i]) except UnicodeDecodeError: self.__output.addMessage( __file__, 3, 'EBPARSE', 'Could not decode file (using %s encoding).' % encoding) return None return ret
total_votes = 0 vote_khan = 0 khan_per = 0.000 vote_correy = 0 correy_per = 0.000 vote_li = 0 li_per = 0.000 vote_tooley = 0 tooley_per = 0.000 vote_dict = {} with open(csvpath, newline="") as csvfile: csvreader = csv.reader(csvfile, delimiter=',') if csv.Sniffer().has_header: next(csvreader) for row in csvreader: if row[2] == "Khan": vote_khan += 1 if row[2] == "Correy": vote_correy += 1 if row[2] == "Li": vote_li += 1 if row[2] == "O'Tooley": vote_tooley += 1
def _load_simple_text_file(file, time_col=0, id_col=None, remove_negative_ids=False, valid_filter=None, crowd_ignore_filter=None, convert_filter=None, is_zipped=False, zip_file=None, force_delimiters=None): """ Function that loads data which is in a commonly used text file format. Assumes each det is given by one row of a text file. There is no limit to the number or meaning of each column, however one column needs to give the timestep of each det (time_col) which is default col 0. The file dialect (deliminator, num cols, etc) is determined automatically. This function automatically separates dets by timestep, and is much faster than alternatives such as np.loadtext or pandas. If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded. These are not excluded from ignore data. valid_filter can be used to only include certain classes. It is a dict with ints as keys, and lists as values, such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict. If None, all classes are included. crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter. convert_filter can be used to convert value read to another format. This is used most commonly to convert classes given as string to a class id. This is a dict such that the key is the column to convert, and the value is another dict giving the mapping. Optionally, input files could be a zip of multiple text files for storage efficiency. Returns read_data and ignore_data. Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values). Note that all data is returned as strings, and must be converted to float/int later if needed. Note that timesteps will not be present in the returned dict keys if there are no dets for them """ if remove_negative_ids and id_col is None: raise TrackEvalException('remove_negative_ids is True, but id_col is not given.') if crowd_ignore_filter is None: crowd_ignore_filter = {} if convert_filter is None: convert_filter = {} try: if is_zipped: # Either open file directly or within a zip. if zip_file is None: raise TrackEvalException('is_zipped set to True, but no zip_file is given.') archive = zipfile.ZipFile(os.path.join(zip_file), 'r') fp = io.TextIOWrapper(archive.open(file, 'r')) else: fp = open(file) read_data = {} crowd_ignore_data = {} fp.seek(0, os.SEEK_END) # check if file is empty if fp.tell(): fp.seek(0) dialect = csv.Sniffer().sniff(fp.readline(), delimiters=force_delimiters) # Auto determine structure. dialect.skipinitialspace = True # Deal with extra spaces between columns fp.seek(0) reader = csv.reader(fp, dialect) for row in reader: try: # Deal with extra trailing spaces at the end of rows if row[-1] in '': row = row[:-1] timestep = str(int(float(row[time_col]))) # Read ignore regions separately. is_ignored = False for ignore_key, ignore_value in crowd_ignore_filter.items(): if row[ignore_key].lower() in ignore_value: # Convert values in one column (e.g. string to id) for convert_key, convert_value in convert_filter.items(): row[convert_key] = convert_value[row[convert_key].lower()] # Save data separated by timestep. if timestep in crowd_ignore_data.keys(): crowd_ignore_data[timestep].append(row) else: crowd_ignore_data[timestep] = [row] is_ignored = True if is_ignored: # if det is an ignore region, it cannot be a normal det. continue # Exclude some dets if not valid. if valid_filter is not None: for key, value in valid_filter.items(): if row[key].lower() not in value: continue if remove_negative_ids: if int(float(row[id_col])) < 0: continue # Convert values in one column (e.g. string to id) for convert_key, convert_value in convert_filter.items(): row[convert_key] = convert_value[row[convert_key].lower()] # Save data separated by timestep. if timestep in read_data.keys(): read_data[timestep].append(row) else: read_data[timestep] = [row] except Exception: exc_str_init = 'In file %s the following line cannot be read correctly: \n' % os.path.basename( file) exc_str = ' '.join([exc_str_init]+row) raise TrackEvalException(exc_str) fp.close() except Exception: print('Error loading file: %s, printing traceback.' % file) traceback.print_exc() raise TrackEvalException( 'File %s cannot be read because it is either not present or invalidly formatted' % os.path.basename( file)) return read_data, crowd_ignore_data
def csvreader(reader, rep, chemin, fichier, entete=None, separ=None): reader.prepare_lecture_fichier(rep, chemin, fichier) logger = reader.regle_ref.stock_param.logger if separ is None: separ = reader.separ # nom_schema, nom_groupe, nom_classe = getnoms(rep, chemin, fichier) nbwarn = 0 # print(" lecture_csv, separ:", len(separ), separ, "<>", reader.encoding) with open( os.path.join(rep, chemin, fichier), newline="", encoding=reader.encoding ) as csvfile: sample = csvfile.read(4094) try: dialect = csv.Sniffer().sniff(sample, delimiters=separ) except csv.Error: logger.warning("erreur determination dialecte csv, parametres par defaut") dref = csv.get_dialect("excel") linesep = "\r\n" if "\r\n" in sample else "\n" has_header = False if sample.startswith("!"): hline = sample.split(linesep, 1)[0] entete = hline[1:].split(separ) csvfile.seek(0) has_header = True csv.register_dialect( "special", dref, delimiter=separ, lineterminator=linesep ) dialect = csv.get_dialect("special") lecteur = csv.DictReader(csvfile, dialect=dialect) if has_header: lecteur.__next__() if entete is None: has_header = csv.Sniffer().has_header(sample) or sample.startswith("!") csvfile.seek(0) lecteur = csv.DictReader(csvfile, dialect=dialect) if has_header: entete = [ i.replace(" ", "_").replace("!", "") for i in lecteur.fieldnames ] else: entete = ["champ_" + str(i) for i in range(len(lecteur.fieldnames))] if entete[-1] == "tgeom" or entete[-1] == "geometrie": entete[-1] = "#geom" lecteur = csv.DictReader( csvfile, fieldnames=entete, dialect=dialect, restval="", restkey="#reste" ) # print("entete csv", entete, dialect.delimiter) if reader.newschema: for i in entete: if i[0] != "#": reader.schemaclasse.stocke_attribut(i, "T") reader.prepare_attlist(entete) # print("attlist", reader.attlist) type_geom = "-1" if entete[-1] == "#geom" else "0" reader.fixe["#type_geom"] = type_geom for attributs in lecteur: obj = reader.getobj(attributs=attributs) # print(" recup objet", obj) if obj is None: continue # filtrage entree reader.process(obj)
def convert(filepath_or_fileobj, dbpath, table, headerspath_or_fileobj=None, compression=None, typespath_or_fileobj=None): if isinstance(filepath_or_fileobj, string_types): if compression is None: fo = open(filepath_or_fileobj, mode=read_mode) elif compression == 'bz2': try: fo = bz2.open(filepath_or_fileobj, mode=read_mode) except AttributeError: fo = bz2.BZ2File(filepath_or_fileobj, mode='r') elif compression == 'gzip': fo = gzip.open(filepath_or_fileobj, mode=read_mode) else: fo = filepath_or_fileobj try: dialect = csv.Sniffer().sniff(fo.readline()) except TypeError: dialect = csv.Sniffer().sniff(str(fo.readline())) fo.seek(0) # get the headers header_given = headerspath_or_fileobj is not None if header_given: if isinstance(headerspath_or_fileobj, string_types): ho = open(headerspath_or_fileobj, mode=read_mode) else: ho = headerspath_or_fileobj header_reader = csv.reader(ho, dialect, delimiter='\t') headers = [header.strip() for header in next(header_reader)] ho.close() else: reader = csv.reader(fo, dialect, delimiter='\t') headers = [header.strip() for header in next(reader)] fo.seek(0) # get the types if typespath_or_fileobj is not None: if isinstance(typespath_or_fileobj, string_types): to = open(typespath_or_fileobj, mode=read_mode) else: to = typespath_or_fileobj type_reader = csv.reader(to, dialect, delimiter='\t') types = [_type.strip() for _type in next(type_reader)] to.close() else: # guess types type_reader = csv.reader(fo, dialect, delimiter='\t') if not header_given: next(type_reader) types = _guess_types(type_reader, len(headers)) fo.seek(0) # now load data _columns = ','.join([ '"%s" %s' % (header, _type) for (header, _type) in zip(headers, types) ]) reader = csv.reader(fo, dialect, delimiter='\t') if not header_given: # Skip the header next(reader) conn = sqlite3.connect(dbpath) # shz: fix error with non-ASCII input conn.text_factory = str c = conn.cursor() try: create_query = 'CREATE TABLE %s (%s)' % (table, _columns) c.execute(create_query) except: pass _insert_tmpl = 'INSERT INTO %s VALUES (%s)' % (table, ','.join( ['?'] * len(headers))) line = 0 for row in reader: line += 1 if len(row) == 0: continue # we need to take out commas from int and floats for sqlite to # recognize them properly ... try: row = [ None if x == '' else float(x.replace(',', '')) if y == 'real' else int(x) if y == 'integer' else x for (x, y) in zip(row, types) ] c.execute(_insert_tmpl, row) except ValueError as e: print("Unable to convert value '%s' to type '%s' on line %d" % (x, y, line), file=sys.stderr) except Exception as e: print("Error on line %d: %s" % (line, e), file=sys.stderr) conn.commit() c.close()
# в объекты класса диалект. print(f'По умолчанию доступны следующие диалекты {csv.list_dialects()}') # Но настроив все параметры вручную, можно создать и свой собственный диалект. # подробнее об этом можно прочитать здесь: # https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters # Что же делать, если извне вы получаете файл, но заранее не знаете # какие параметры диалекта в нём использовались? # Эту проблему решит класс csv.Sniffer() # Его метод sniff() позволяет по примеру строки # восстановить параметры используемого диалекта: with open('external_data/Indiana_stash.csv', 'rb') as csvfile: dialect = csv.Sniffer().sniff(str(csvfile.readline()), [',', ';']) csvfile.seek(0) reader = csv.reader(csvfile, dialect) print(dialect) # <class 'csv.Sniffer.sniff.<locals>.dialect'> print(reader) # <_csv.reader object at 0x018CFD30> # Передав найденные параметры класс reader, # мы с его помощью сможем корректно прочесть информацию из файла. # Пример: # Индиана, собираясь в очередное приключение должен отправить в бухгалтерию # своего университета данные о закупках инструментов к предстоящей археологической экспедиции. # За основу он взял стандартный перечень инструментов: standart_need_list = []
def ReadCSV(self): ''' * Before looking at the SVG document, parse the CSV data. * Count the rows * Initialize a dictionary with column header names as keys and values from the first row to be merged. ''' # if self.skipMerge: # return # Read CSV file path, stored in a custom "MergeData" XML element self.csv_data_read = False CSVfile = None csvNode = None fileName = None for node in self.svg: if node.tag == 'svg': for subNode in self.svg: if subNode.tag == inkex.addNS( 'MergeData', 'svg') or subNode.tag == 'MergeData': fileName = subNode.text.encode('utf-8') elif node.tag == inkex.addNS('MergeData', 'svg') or node.tag == 'MergeData': fileName = node.text.encode('utf-8') if fileName is not None: self.csv_file_path = fileName if self.skipMerge: return if fileName is None: inkex.errormsg( "No CSV file name selected. Use Data tab to select a CSV file." ) return else: # inkex.errormsg( "File: " + str(filename)) try: CSVfile = open(fileName.decode('utf-8'), 'rU') except: inkex.errormsg( "CSV data file not found. Use Data tab to select a CSV file." ) if CSVfile is not None: try: CSVfile = CSVfile.read() except: inkex.errormsg( "No CSV data found in file. Use Data tab to select a CSV file." ) if CSVfile is None: return CSVfile = '\n'.join(CSVfile.splitlines()) dialect_read = csv.Sniffer().sniff(StringIO(CSVfile).readline()) dialect_read.doublequote = True # Force two quotes ("") to be read as an escaped quotation mark. # This is a hack for excel compatibility; it may cause issues with some less common encodings. self.reader = csv.reader(StringIO(CSVfile), dialect_read) self.csv_row_count = sum( 1 for row in self.reader) - 1 # Subtract 1 for header row # This count exhausts the reader by iteration; we need to reset the reader: self.reader = csv.DictReader(StringIO(CSVfile), dialect=dialect_read) if (self.csv_row_count < self.options.last_row): self.options.last_row = self.csv_row_count # Limit last row of data to end of CSV file if (self.row_to_plot > self.csv_row_count): return # Initialize dictionary for _first row_ that we'll be merging. # This may not be the first data row in the file, depending on which the user has selected. currentRow = 1 row = next(self.reader) if (self.row_to_plot <= self.options.last_row): # If we are merging any rows, while (currentRow < self.row_to_plot ): # If we are not at the first row to merge row = next(self.reader) currentRow += 1 self.rowDictionary = {} # Initialize the row dictionary for item in self.reader.fieldnames: safe_text = row[item].replace('&', '&') safe_text = safe_text.replace('<', '<') safe_text = safe_text.replace('>', '>') safe_text = safe_text.replace('"', '"') safe_text = safe_text.replace("'", ''') if self.remove_blank_rows and safe_text == "": self.rowDictionary['{{' + item + '}}'] = '[[EMPTY]]' else: self.rowDictionary['{{' + item + '}}'] = safe_text
import csv parser = argparse.ArgumentParser() parser.add_argument("input_file", help = "location/name of input file") parser.add_argument("output_file", help = "location/name of output file") parser.add_argument("--in-delimiter", action = 'store', default = None) parser.add_argument("--in-quote", action = 'store', default = None) args = parser.parse_args() with open(args.input_file, newline = '') as in_csv: # If delimiter or quote not specified determine it using Sniffer: if args.in_delimiter == None or args.in_quote == None: dialect = csv.Sniffer().sniff(in_csv.read(1024)) if args.in_delimiter == None: args.in_delimiter = dialect.delimiter if args.in_quote == None: args.in_quote = dialect.quotechar in_csv.seek(0) in_reader = csv.reader(in_csv, delimiter=args.in_delimiter, quotechar = args.in_quote) with open(args.output_file, 'w', newline = '') as out_csv: out_writer = csv.writer(out_csv, delimiter = ",") for row in in_reader: out_writer.writerow(row)
def effect(self): '''Main entry point: check to see which mode/tab is selected, and act accordingly.''' self.versionString = "AxiDraw Merge v 2.5.3 dated 2019-06-11" self.spewDebugdata = False self.start_time = time.time() self.remove_blank_rows = True self.pageDelays = 0.0 self.rows_plotted = 0 self.serial_port = None self.svg_data_written = False self.csv_data_read = False self.csv_data_written = False self.csv_file_path = None self.csv_row_count = None self.delay_between_rows = False # Not currently delaying between copies self.b_stopped = False # Not currently stopped by button press #Values to be read from file: self.svg_rand_seed_Old = float(1.0) self.svg_row_old = float(0.0) # Last row plotted. self.svg_rand_seed = float(1.0) self.svgRow = int(1) self.row_to_plot = 1 skipSerial = False if self.options.preview: skipSerial = True # Input sanitization: self.options.mode = self.options.mode.strip("\"") self.options.single_type = self.options.single_type.strip("\"") self.options.data_action = self.options.data_action.strip("\"") self.options.fontface = self.options.fontface.strip("\"") self.options.otherfont = self.options.otherfont.strip("\"") self.options.setup_type = self.options.setup_type.strip("\"") self.options.resume_type = self.options.resume_type.strip("\"") if self.options.page_delay < 0: self.options.page_delay = 0 if self.options.mode == "model": return if self.options.mode == "options": return if self.options.mode == "timing": return if self.options.mode == "csv": skipSerial = True if self.options.mode == "text": return if self.options.mode == "version": # inkex.errormsg( gettext.gettext(self.versionString)) # Accessible from CLI only return import axidraw # https://github.com/evil-mad/axidraw import hershey_advanced ad = axidraw.AxiDraw() hta = hershey_advanced.HersheyAdv() ad.getoptions([]) self.svg = self.document.getroot() ad.ReadWCBdata(self.svg) self.svg_row_old = ad.svg_row_old # Access params from ReadWCBdata ad.called_externally = True if self.options.mode == "singlePlot": if self.options.single_type == "queryRow": # No plotting; Query and report last row plotted inkex.errormsg('Last row merged: Row number ' + str(int(self.svg_row_old))) inkex.errormsg('Next row to merge: Row number ' + str(int(self.svg_row_old + 1))) return if skipSerial == False: self.serial_port = ebb_serial.openPort() if self.serial_port is None: inkex.errormsg( gettext.gettext("Failed to connect to AxiDraw. :(")) return self.skipMerge = True if self.options.mode == "autoPlot" or self.options.mode == "singlePlot"\ or self.options.mode == "resume": self.xmlstr = etree.tostring(self.document, encoding='utf8', method='xml') if ('{{' in self.xmlstr) and ('}}' in self.xmlstr): self.skipMerge = False if self.options.mode == "autoPlot": pen_down_travel_inches = 0.0 # Local variable pen_up_travel_inches = 0.0 # Local variable pt_estimate = 0.0 # Local variable continue_plotting = True self.row_to_plot = int(self.options.first_row) if (self.options.last_row == 0 ): # "Continue until last row of data" self.options.last_row = 10000 # A large number; only limit by size of data. self.ReadCSV() if (self.csv_row_count is not None) or self.skipMerge: if (self.row_to_plot > self.csv_row_count) and not self.skipMerge: inkex.errormsg( gettext.gettext( "No merge data found in specified range of rows.")) #self.row_to_plot = ad.svg_row_old continue_plotting = False if (self.options.last_row < self.options.first_row): continue_plotting = False inkex.errormsg('Nothing to plot; No data rows selected.') if (continue_plotting): ad.backup_original = deepcopy(self.original_document) while (continue_plotting): self.svg_rand_seed = round( time.time() * 100) / 100 # New random seed for new plot self.mergeAndPlot(hta, ad) if self.spewDebugdata: inkex.errormsg('Merging row number ' + str(int(self.row_to_plot)) + '.') pen_down_travel_inches = pen_down_travel_inches + ad.pen_down_travel_inches # Local copy pen_up_travel_inches = pen_up_travel_inches + ad.pen_up_travel_inches # Local copy pt_estimate = pt_estimate + ad.pt_estimate # Local copy if ( ad.b_stopped ): # A pause was triggered while plotting the previous row. inkex.errormsg( 'Paused while plotting row number ' + str(int(self.row_to_plot)) + '.') continue_plotting = False else: # Finished plotting the row without being paused self.row_to_plot = self.row_to_plot + 1 if (self.row_to_plot > self.options.last_row): continue_plotting = False # We have already finished the last row. else: # We will be plotting at least one more row. Delay first. self.next_csv_row() self.delay_between_rows = True # Indicate that we are currently delaying between copies timeCounter = 10 * self.options.page_delay # 100 ms units if self.spewDebugdata: inkex.errormsg( 'Delaying ' + str(int(self.options.page_delay)) + ' seconds.') while (timeCounter > 0): timeCounter = timeCounter - 1 if (self.b_stopped == False): if self.options.preview: pt_estimate += 100 self.pageDelays += 0.1 else: time.sleep( 0.100 ) # Use short intervals to improve responsiveness self.PauseCheck( ) #Query if button pressed self.delay_between_rows = False # Not currently delaying between copies if (self.b_stopped == True ): # if button pressed self.row_to_plot = self.row_to_plot - 1 # Backtrack; we didn't actually get to that row. inkex.errormsg( 'Sequence halted after row number ' +\ str(int(self.row_to_plot)) + '.') continue_plotting = False # Cancel plotting sequence ad.pen_down_travel_inches = pen_down_travel_inches # Copy local values back to ad.(values) ad.pen_up_travel_inches = pen_up_travel_inches # for printing time report. ad.pt_estimate = pt_estimate self.printTimeReport(ad) elif self.options.mode == "singlePlot": doPlot = True if self.options.single_type == "singleFix": # Plot a specified row self.row_to_plot = int(self.options.single_row) elif self.options.single_type == "singleAdv": # Automatically advance self.row_to_plot = int(self.svg_row_old + 1) else: doPlot = False if doPlot: self.svg_rand_seed = round( time.time() * 100) / 100 # New random seed for new plot self.options.last_row = self.row_to_plot # Last row is equal to first row, in this case. self.ReadCSV() if (self.csv_row_count is not None) or self.skipMerge: if (self.row_to_plot > self.csv_row_count) and not self.skipMerge: inkex.errormsg( gettext.gettext( \ "No merge data found in row number " ) + str(self.row_to_plot) + '.') #self.row_to_plot = ad.svg_row_old else: ad.backup_original = deepcopy(self.original_document) self.mergeAndPlot(hta, ad) self.printTimeReport(ad) elif self.options.mode == "resume": ad.options.mode = "resume" self.svg_rand_seed = ad.svg_rand_seed_old # Preserve random seed self.row_to_plot = self.svg_row_old # Preserve SVG Row ad.options.resume_type = self.options.resume_type if self.options.resume_type == "home": self.options.fontface = "none" # Disable Hershey Text substegitution self.mergeAndPlot(hta, ad) elif ad.svg_application_old != "Axidraw Merge": inkex.errormsg( gettext.gettext( "No AxiDraw Merge resume data found in file.")) elif ad.svg_layer_old == 12345: # There appears to be a paused "all layers" plot self.options.last_row = self.row_to_plot self.ReadCSV() if (self.csv_row_count is not None) or self.skipMerge: ad.backup_original = deepcopy(self.original_document) self.mergeAndPlot(hta, ad) self.printTimeReport(ad) else: inkex.errormsg( gettext.gettext( "No in-progress plot data found saved in file.")) elif self.options.mode == "setup": if self.options.preview: inkex.errormsg( gettext.gettext( 'Command unavailable while in preview mode.')) else: ad.options.mode = self.options.mode ad.options.setup_type = self.options.setup_type ad.options.pen_pos_up = self.options.pen_pos_up ad.options.pen_pos_down = self.options.pen_pos_down ad.document = self.document ad.options.port = self.serial_port ad.effect() elif self.options.mode == "csv": if self.options.data_action == "choose": # Select and upload a CSV file useGTK = False filename = None try: import pygtk pygtk.require('2.0') import gtk # Use gtk to create file selection dialog box. useGTK = True except: pass if useGTK: dialog = gtk.FileChooserDialog(\ title="Please choose a CSV file",\ action=gtk.FILE_CHOOSER_ACTION_OPEN,\ buttons=(gtk.STOCK_CANCEL,gtk.RESPONSE_CANCEL,\ gtk.STOCK_OPEN,gtk.RESPONSE_OK)) dialog.set_default_response(gtk.RESPONSE_OK) filter = gtk.FileFilter() filter.set_name("Text/CSV") filter.add_pattern("*.CSV") filter.add_pattern("*.csv") filter.add_pattern("*.txt") filter.add_pattern("*.TXT") filter.add_mime_type("text/csv") filter.add_mime_type("text/plain") filter.add_mime_type("application/csv") filter.add_mime_type("application/x-csv") filter.add_mime_type("text/x-csv") filter.add_mime_type("text/csv") filter.add_mime_type("text/comma-separated-values") filter.add_mime_type("text/x-comma-separated-values") filter.add_mime_type("text/tab-separated-values") dialog.add_filter(filter) filter = gtk.FileFilter() filter.set_name("All files") filter.add_pattern("*") dialog.add_filter(filter) response = dialog.run() if response == gtk.RESPONSE_OK: filename = dialog.get_filename() #inkex.errormsg( "File selected: " + filename) # Print full path # inkex.errormsg( "Selected file: " + str(os.path.basename(filename))) # Print file name elif response == gtk.RESPONSE_CANCEL: inkex.errormsg( gettext.gettext('No CSV file selected.')) filter.destroy() dialog.destroy() else: # i.e., if not useGTK. Try Tkinter, useTK = False try: import Tkinter import tkFileDialog useTK = True except: inkex.errormsg( "Unable to load TK or GTK. Please contact technical support for assistance." ) if useTK: Tkinter.Tk().withdraw() # Close the root window filename = tkFileDialog.askopenfilename( title="Select CSV File") if filename == "": inkex.errormsg( gettext.gettext('No CSV file selected.')) filename = None if filename is not None: CSVfile = open(filename, 'rU') try: dialect_read = csv.Sniffer().sniff(CSVfile.readline()) except: dialect_read = None inkex.errormsg( "Unable to determine format of selected file, " \ + str(os.path.basename(filename)) ) # Print file name if dialect_read is None: CSVfile.close() else: CSVfile.seek(0) # Rewind file to beginning reader = csv.reader(CSVfile, dialect=dialect_read) CSVrowCount = sum( 1 for row in reader) - 1 # Subtract 1 for header row CSVfile.seek(0) if (CSVrowCount > 0): CSVfile = open(filename, 'rU') reader = csv.DictReader(CSVfile, dialect=dialect_read) fileName_basename = os.path.basename( filename).encode('utf-8') inkex.errormsg( "Found " + str(CSVrowCount) + " Rows of merge data in file " \ + fileName_basename) # Print file name key_names = "Column names: " for item in reader.fieldnames: key_names = key_names + "{{" + item + "}}, " key_names = key_names[: -2] # drop last two characters from string (", ") inkex.errormsg(key_names) # Print key list self.csv_file_path = filename.encode( 'utf-8') # Path & Name of the file self.storeCSVpath( self.svg ) # Store path & name file in our SVG file. else: inkex.errormsg( "Unable to interpret selected file" + str(os.path.basename(filename)) + ".") CSVfile.close() elif self.options.data_action == "view": self.csv_data_read = False CSVfile = None csvNode = None for node in self.svg: if node.tag == 'svg': for subNode in self.svg: if subNode.tag == inkex.addNS( 'MergeData', 'svg') or subNode.tag == 'MergeData': csvNode = subNode elif node.tag == inkex.addNS( 'MergeData', 'svg') or node.tag == 'MergeData': csvNode = node if csvNode is not None: try: CSVfile = csvNode.text self.csv_data_read = True except: self.svg.remove( csvNode ) # An error before this point leaves csvDataRead as False. if CSVfile is None: inkex.errormsg( "No CSV data found in file. Please select and load a CSV file." ) return else: inkex.errormsg("The selected CSV data file is:") inkex.errormsg(CSVfile) if self.serial_port is not None: ebb_motion.doTimedPause( self.serial_port, 10) #Pause a moment for underway commands to finish... ebb_serial.closePort(self.serial_port)
#!/usr/bin/env python import csv infile = "/home/pzs/histone/wigglefiles/2210_02_K4Me1.wig.txt" fh = open(infile, "rb") line = fh.readline() while not line.startswith("chr"): line = fh.readline() dialect = csv.Sniffer().sniff(fh.read(1024)) fh.seek(0) print dir(dialect) reader = csv.reader(fh, dialect) for line in reader: print line
pRanWoman = 0 pNotRanMale = 0 pNotRanWoman = 0 pRanHadMarathonExperience = 0 pRanHadNoMarathonExperience = 0 pNotRanHadMarathonExperience = 0 pNotRanHadNoMarathonExperience = 0 pRanMarathonIn2014 = 0 pRanNoMarathonIn2014 = 0 pNotRanMarathonIn2014 = 0 pNotRanNoMarathonIn2014 = 0 pRanHalfMarathonIn2014 = 0 pRanNoHalfMarathonIn2014 = 0 pNotRanHalfMarathonIn2014 = 0 pNotRanNoHalfMarathonIn2014 = 0 dialect = csv.Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) reader = csv.reader(csvfile, dialect) for row in reader: if row[9] == '1': pRan = pRan + 1 if row[2] == '0': pRanWoman = pRanWoman + 1 else: pRanMale = pRanMale + 1 if int(row[1]) > 0: pRanHadMarathonExperience = pRanHadMarathonExperience + 1 else: pRanHadNoMarathonExperience = pRanHadNoMarathonExperience + 1
def read(data, path, prog_cb): file_size = os.stat(path).st_size with open(path, mode='rb') as file: byts = file.read(4096) det = chardet.detect(byts) encoding = det['encoding'] file.seek(0) if encoding == 'ascii': encoding = 'utf-8-sig' csvfile = TextIOWrapper(file, encoding=encoding, errors='replace') try: some_data = csvfile.read(4096) if len(some_data) == 4096: # csv sniffer doesn't like partial lines some_data = trim_after_last_newline(some_data) dialect = csv.Sniffer().sniff(some_data, ', \t;') except csv.Error as e: log.exception(e) dialect = csv.excel csvfile.seek(0) reader = csv.reader(csvfile, dialect) itr = reader.__iter__() column_names = itr.__next__() column_count = 0 column_writers = [ ] if len(column_names) == 0: column_names = ['A'] for i in range(len(column_names)): column_name = column_names[i] data.append_column(column_name, column_name) column = data[i] column.column_type = ColumnType.DATA column_writers.append(ColumnWriter(column, i)) column_count += 1 row_count = 0 csvfile.seek(0) reader = csv.reader(csvfile, dialect) first = True for row in reader: if first: first = False else: for i in range(column_count): column_writers[i].examine_row(row) row_count += 1 if row_count % 1000 == 0: prog_cb(int(33333 * file.tell() / file_size)) for column_writer in column_writers: column_writer.ruminate() data.set_row_count(row_count) csvfile.seek(0) reader = csv.reader(csvfile, dialect) first = True row_no = 0 for row in reader: if first: first = False else: for i in range(column_count): column_writers[i].parse_row(row, row_no) row_no += 1 if row_no % 1000 == 0: prog_cb(int(33333 + 66666 * file.tell() / file_size))
def csv_has_header(csv_path, sample_size=100): """ Determines if a CSV file has a header sniffing the sample_size first rows """ return csv.Sniffer().has_header(get_csv_sample(csv_path))
with open('sample_output.csv', 'w', newline='') as file_write: writer = csv.writer(file_write, delimiter='|', quoting=csv.QUOTE_NONE) for row_read in reader: row_write = row_read row_write[0] = str(row_write[0]).upper() if row_write[2] == '': row_write[2] = '1900' writer.writerow(row_write) ''' # Header Record: With with open('sample_input.csv', 'r', newline='') as file_read: # Check if file has header record snf = csv.Sniffer().has_header(file_read.read(100)) print('Has Header?', snf) file_read.seek(0) reader = csv.DictReader(file_read, delimiter=',', quoting=csv.QUOTE_ALL) fieldnames = reader.fieldnames with open('sample_output.csv', 'w', newline='') as file_write: writer = csv.DictWriter(file_write, fieldnames=fieldnames, delimiter='|', quoting=csv.QUOTE_NONE) writer.writeheader() for row_read in reader: row_write = row_read row_write['TITLE'] = str(row_write['TITLE']).upper() if row_write['YEAR'] == '': row_write['YEAR'] = '1900'
def _sniff_file_info(fname, comment='#', check_header=True, quiet=False): """ Infer number of header rows and delimiter of a file. Parameters ---------- fname : string CSV file containing the genotype information. comment : string, default '#' Character that starts a comment row. check_header : bool, default True If True, check number of header rows, assuming a row that begins with a non-digit character is header. quiet : bool, default False If True, suppress output to screen. Returns ------- n_header : int or None Number of header rows. None is retured if `check_header` is False. delimiter : str Inferred delimiter line : str The first line of data in the file. Notes ----- .. Valid delimiters are: ['\t', ',', ';', '|', ' '] """ valid_delimiters = ['\t', ',', ';', '|', ' '] with open(fname, 'r') as f: # Read through comments line = f.readline() while line != '' and line[0] == comment: line = f.readline() # Read through header, counting rows if check_header: n_header = 0 while line != '' and (not line[0].isdigit()): line = f.readline() n_header += 1 else: n_header = None if line == '': delimiter = None if not quiet: print('Unable to determine delimiter, returning None') else: # If no tab, comma, ;, |, or space, assume single entry per column if not any(d in line for d in valid_delimiters): delimiter = None if not quiet: print('Unable to determine delimiter, returning None') else: delimiter = csv.Sniffer().sniff(line).delimiter # Return number of header rows and delimiter return n_header, delimiter, line
self.lastval=r fulltopic=topic+"status/"+self.topic logging.info("Publishing " + fulltopic) mqc.publish(fulltopic,self.lastval,qos=0,retain=True) self.last = time.time() except modbus_tk.modbus.ModbusError as exc: logging.error("Error reading "+self.topic+": Slave returned %s - %s", exc, exc.get_exception_code()) except Exception as exc: logging.error("Error reading "+self.topic+": %s", exc) registers=[] # Now lets read the register definition with open(args.registers,"r") as csvfile: dialect=csv.Sniffer().sniff(csvfile.read(8192)) csvfile.seek(0) defaultrow={"Size":1,"Format":">H","Frequency":60,"Slave":1,"FunctionCode":4} reader=csv.DictReader(csvfile,fieldnames=["Topic","Register","Size","Format","Frequency","Slave","FunctionCode"],dialect=dialect) for row in reader: # Skip header row if row["Frequency"]=="Frequency": continue # Comment? if row["Topic"][0]=="#": continue if row["Topic"]=="DEFAULT": temp=dict((k,v) for k,v in row.iteritems() if v is not None and v!="") defaultrow.update(temp) continue freq=row["Frequency"]
def read(self): for encoding in ( lambda: ('us-ascii', None), # fast lambda: (detect_encoding(self.filename), None), # precise lambda: (locale.getpreferredencoding(False), None), lambda: (sys.getdefaultencoding(), None), # desperate lambda: ('utf-8', None), # ... lambda: ('utf-8', 'ignore')): # fallback encoding, errors = encoding() # Clear the error flag for all except the last check, because # the error of second-to-last check is stored and shown as warning in owfile if errors != 'ignore': error = '' with self.open(self.filename, mode='rt', newline='', encoding=encoding, errors=errors) as file: # Sniff the CSV dialect (delimiter, quotes, ...) try: dialect = csv.Sniffer().sniff( # Take first couple of *complete* lines as sample ''.join(file.readline() for _ in range(10)), self.DELIMITERS) delimiter = dialect.delimiter quotechar = dialect.quotechar except UnicodeDecodeError as e: error = e continue except csv.Error: delimiter = self.DELIMITERS[0] quotechar = csv.excel.quotechar file.seek(0) try: reader = csv.reader( file, delimiter=delimiter, quotechar=quotechar, skipinitialspace=True, ) data = self.data_table(reader) # TODO: Name can be set unconditionally when/if # self.filename will always be a string with the file name. # Currently, some tests pass StringIO instead of # the file name to a reader. if isinstance(self.filename, str): data.name = path.splitext( path.split(self.filename)[-1])[0] if error and isinstance(error, UnicodeDecodeError): pos, endpos = error.args[2], error.args[3] warning = ('Skipped invalid byte(s) in position ' '{}{}').format(pos, ('-' + str(endpos)) if (endpos - pos) > 1 else '') warnings.warn(warning) self.set_table_metadata(self.filename, data) return data except Exception as e: error = e continue raise ValueError('Cannot parse dataset {}: {}'.format( self.filename, error)) from error
def test_has_header(self): sniffer = csv.Sniffer() self.assertEqual(sniffer.has_header(self.sample1), False) self.assertEqual(sniffer.has_header(self.header + self.sample1), True)
def browseFile(btn): global address global data global header global dictionary dictionary={} try: if(filetype.get()): if(filetype.get()==1): root.filename = filedialog.askopenfilename() print(root.filename) csv_fileh = open(root.filename, 'r',encoding="ISO-8859-1") try: dialect = csv.Sniffer().sniff(csv_fileh.readline()) csv_fileh.seek(100) except csv.Error: label1.config(text="Incompatible format...Choose your file again",relief=RIDGE) print(address) else: address = root.filename label1.config(text="File Address is: "+address,relief=RIDGE) data = pd.read_csv(root.filename,encoding="ISO-8859-1") data = data.replace(np.nan, 0) data = data.replace(np.inf, 0) with open(root.filename, "r",encoding="ISO-8859-1") as f: reader = csv.reader(f) header = next(reader) i=1 for each in header: dictionary[i]=each i=i+1 btn.config(state="active") #Excel read else: root.filename = filedialog.askopenfilename() print(root.filename) try: open_workbook(root.filename,'r') except XLRDError: label1.config(text="Incompatible format...Choose your file again",relief=RIDGE) print(address) else: address = root.filename label1.config(text="File Address is: "+address,relief=RIDGE) data = pd.read_excel(root.filename, sheet_name=None) data = data.replace(np.nan, 0) data = data.replace(np.inf, 0) # data=data.as_matrix() header =pd.read_excel(root.filename).columns.tolist() i=1 for each in header: dictionary[i]=each i=i+1 btn.config(state="active") else: label1.config(text="No option choosen",relief=RIDGE) except: label1.config(text="Incompatible format...Choose your file again",relief=RIDGE) btn.config(state="disabled")
def get_csv_reader(csv_file): dialect = csv.Sniffer().sniff(csv_file.readline()) csv_file.seek(0) return csv.reader(csv_file, dialect)
def test_has_header_regex_special_delimiter(self): sniffer = csv.Sniffer() self.assertEqual(sniffer.has_header(self.sample8), False) self.assertEqual(sniffer.has_header(self.header2 + self.sample8), True)