def _readline(self): '''Read a single decoded line from the file.''' line = self.file.readline(self.max_line_size) if not line: return '' if not line[-1] == '\n' and len(line) >= self.max_line_size: raise csv.Error('Line exceeds maximum size of {}'.format( self.max_line_size)) try: return line.decode(self.encoding) except UnicodeDecodeError: raise csv.Error('Encountered invalid Unicode character') except AttributeError: return line
def load(self, stream, stream_columns): """ Function that loads a custom data source Args: stream (io.StringIO): An in-memory stream of a custom data source. The format of the stream must be a comma-separated file with header containing the columns defined in RG_COLUMNS. """ if not stream_columns: stream_columns = RG_COLUMNS stream_reader = csv.DictReader(stream, delimiter=',') header = stream_reader.fieldnames if header != stream_columns: raise csv.Error('Input must be a comma-separated file with header containing ' + \ 'the following columns - %s.\nFound header - %s.\nFor more help, visit: ' % (','.join(stream_columns), ','.join(header)) + \ 'https://github.com/thampiman/reverse-geocoder') # Load all the coordinates and locations geo_coords, locations = [], [] for row in stream_reader: geo_coords.append((row['lat'], row['lon'])) locations.append(row) return geo_coords, locations
def read_csv(path, is_column_id, delimiter, quotechar, skip_first_row): with open(path) as f: reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar) if skip_first_row: lines = list(reader)[1:] else: lines = list(reader) if len(lines[0]) == 3 and is_column_id: column_id = 0 column_label = 1 column_text_a = 2 column_text_b = -1 elif len(lines[0]) == 3 and not is_column_id: column_id = -1 column_label = 0 column_text_a = 1 column_text_b = 2 elif len(lines[0]) == 4 and is_column_id: column_id = 0 column_label = 1 column_text_a = 2 column_text_b = 3 elif len(lines[0]) == 2 and not is_column_id: column_id = -1 column_label = 0 column_text_a = 1 column_text_b = -1 else: raise csv.Error("The CSV file " + path + " is malformed") return lines, [column_id, column_label, column_text_a, column_text_b]
def sniff(sample, delimiters=None): """ This function mimics the Sniffer.sniff() method from the Python CSV function, with one exception: it doesn't change the detected quotechar to default to '"'. We do this because we want to know the detected quote character. """ sniffer = csv.Sniffer() quotechar, doublequote, delimiter, skipinitialspace = sniffer._guess_quote_and_delimiter( sample, delimiters) if not delimiter: delimiter, skipinitialspace = sniffer._guess_delimiter( sample, delimiters) if not delimiter: raise csv.Error("Could not determine delimiter") class dialect(csv.Dialect): _name = "sniffed" lineterminator = "\r\n" # unused quoting = csv.QUOTE_MINIMAL dialect.doublequote = doublequote dialect.delimiter = delimiter dialect.quotechar = quotechar # See above dialect.skipinitialspace = skipinitialspace dialect.escapechar = '' if dialect.escapechar is None else dialect.escapechar return dialect
def read_from_csv(self, lexicon_csv_path: str, encoding: str='utf-8', **csv_reader_keyword_arguments): """ Append lexicon entries from CSV with columns for lemma, topic and rating. If the lemma is missing or starts with '#' the whole row is ignored. Leading and trailing white space is ignored. Upper and lower case on topics and ratings is ignored. :param lexicon_csv_path: path to the CSV file to read :param encoding: encoding of the CSV file :param csv_reader_keyword_arguments: additional options for the CSV reader, for example ``delimiter=','`` """ assert lexicon_csv_path is not None with open(lexicon_csv_path, encoding=encoding, newline='') as csv_file: lexicon_reader = csv.reader(csv_file, **csv_reader_keyword_arguments) for row in lexicon_reader: row = [item.strip() for item in row] row += 3 * [''] # Ensure we have at least 4 strings try: self._append_lexicon_entry_from_row(row) except ValueError as error: raise csv.Error( '%s:%d: %s' % ( lexicon_csv_path, lexicon_reader.line_num, error))
def get_csv(csv_path, dialect): """ Returns csv_data array for provided csv_path """ try: #register dialect provided as parameter csv.register_dialect('flatfileed', delimiter=dialect['CSV_DELIMITER'], quoting=dialect['CSV_QUOTING'], doublequote=dialect['CSV_DOUBLEQUOTE'], strict=True) csv_file = open(csv_path) #csv_reader = csv.reader(csv_file, delimiter=',', doublequote=True) csv_reader = csv.reader(csv_file, 'flatfileed') csv_data = [] for row in csv_reader: csv_data.append(row) csv_file.close() if validate_csv(csv_data, dialect) == True: return csv_data else: raise csv.Error except FileNotFoundError: raise FileNotFoundError('CSV File not provided!') except csv.Error: raise csv.Error( 'Wrong format of provied CSV! Check file or review configuration')
def writelines(self, rows: t.Union[list, tuple, t.Generator], **kwargs): asdict = kwargs.get('asdict') first_line = None if asdict and not self.has_headers: self.headers, first_line = self._detect_headers_before_write(rows) if not first_line: raise Error( f'After header detection from rows= first_line has no data!\n{first_line}' ) with self.writer(**kwargs) as writer: # writeheader if not self.has_headers and isinstance(writer, csv.DictWriter): raise csv.Error( 'No headers set - write prohibited! Try to set asdict=True' ) if self.has_headers and isinstance(writer, csv.DictWriter): writer.writeheader() if isinstance(rows, t.Generator): # at headers detection phase, from rows=, as generator, # already was read first line of creating tsv-file - # save it additionally from first_line, right after header writer.writerow(first_line) for line in rows: writer.writerow(line) if isinstance(self.text, io.StringIO): return self.text.getvalue()
def list_to_csv(row_list: list, filename: str=''): """ Function to write a list of rows to a CSV file. Args: row_list (list): A list of lists (the rows). filename (str, optional): Name for the CSV file. Defaults to 'report.csv'. Returns: bool: True if CSV successfully created, False otherwise. """ # convert single column list to required format if not isinstance(row_list[0], (list, tuple)): row_list = [(line,) for line in row_list] # write csv file try: with open(filename if filename else 'report.csv', 'w', newline='', encoding='utf-8') as file: writer = csv.writer(file, quoting=csv.QUOTE_ALL, delimiter=',') writer.writerows(row_list) except OSError as e: raise OSError(f'Error creating outputfile: {e}') from e except csv.Error as e: raise csv.Error(f'Error writing CSV data to file: {e}') from e
def read_base_comp(filename): """Read the base compition from a file created by write_base_comp""" with open(filename, newline="") as csvfile: reader = csv.DictReader(csvfile) for row in reader: return row raise csv.Error("No rows found in %r" % (filename, ))
def _get_updated_record( new_record, existing_record, continue_after_error, columns_with_arrays, ): record_to_write = copy.deepcopy(existing_record) # for any column not in the standard set, either update the existing # record with new data, or leave column as data provided for column_name in [ key for key in new_record.keys() if key not in ( GUID_STANDARD_KEY, SIZE_STANDARD_KEY, MD5_STANDARD_KEY, ) ]: # first handle space-delimited columns if column_name in columns_with_arrays: if column_name in existing_record: # column that has a space-delimited array of values record_to_write[column_name] = " ".join( sorted( list( set(new_record[column_name].split(" ") + existing_record[column_name].split(" ")))) ).strip(" ") else: record_to_write[column_name] = " ".join( sorted(list(set( new_record[column_name].split(" "))))).strip(" ") # handle non-space-delimited columns else: if not existing_record.get(column_name) or ( existing_record.get(column_name) == new_record[column_name]): # use new record when nothing in existing record or it's the same data record_to_write[column_name] = new_record[column_name] elif not new_record[column_name]: # persist existing data if no new data record_to_write[column_name] = existing_record.get( column_name, "") else: # old and new have different values, unsure how to merge error_msg = ( f"NOT merging column {column_name} for " f"existing {existing_record} and new " f"{new_record} because unsure how to merge the values.\nERROR: IGNORING NEW VALUE if " f"forced to continue without error.") logging.error(error_msg) if not continue_after_error: raise csv.Error(error_msg) # if we're here, that means we are just going to ignore new data # and add a row with the existing data return record_to_write
def submitcontext(): try: updated = False username = request.form["username"] context = request.form["context"] headers = {"content-type": "application/json"} if (username == "" or username == None): flash([f"There was no username provided in the context request", "Warning"]) return redirect(url_for('homepage')) if username == "easterEggz": return redirect("https://thecodinglove.com/when-i-try-to-bypass-a-security-feature") contextFile = {} filename = "context.txt" # Open config file and check for an existing context with open(filename, "r") as csvfile: print("\nopened file\n") reader = csv.reader(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE) print("\ncreated a reader\n") for row in reader: print("\noread first row\n") if len(row) != 2: raise csv.Error("Too many fields on row with contents") contextFile[row[0]] = row[1] contextFile[username] = context csv.register_dialect('myDialect', delimiter = '=', escapechar='\\', quoting=csv.QUOTE_NONE) # Update / Insert context for the user with open(filename, "w") as csvfile: fieldnames = ['property', 'value'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames, dialect="myDialect") for k,v in contextFile.items(): writer.writerow({"property": k, 'value': v}) # Flash a message to the user if (updated): flash([f"Successfully updated user {username}'s context to {context}", "Success"]) else: flash([f"Successfully set user {username}'s context to {context}", "Success"]) return redirect(url_for('homepage')) except Exception as e: flash(["Was not able to update context", "Error"]) print(e) abort(500)
def parse_csv(self, s, length): try: l = list(csv.reader([s]))[0] except csv.Error(e): raise PassParserError('Error parsing CSV') if len(l) != length: raise PassParserError( f'Invalid CSV forrmat, {length} columns expected instead of {len(l)}' ) return l
def load_csv(self, filename): with open(filename, newline='') as f: reader = csv.reader(f) try: return [row for row in reader if row] # skip blank lines except csv.Error as e: raise csv.Error('file {}, line {}: {}'.format( filename, reader.line_num, e)) from e except FileNotFoundError: raise
def _process_csv(self): ''' collect information from csv file ''' try: ifile = open(self.csv_file, "r") self._templates = csv.DictReader(ifile) # ifile.close() except csv.Error, emsg: raise csv.Error("Issues with csv file %s. %s" % (os.path.basename(self.csv_file), emsg))
def read_ac_file(self, ac_file): reader = csv.reader(ac_file, delimiter=' ', skipinitialspace=True) try: for row in reader: # See if this is a valid token and pass the file handle and the current line to our function if row[0] in self.tokens.keys(): self.tokens[row[0]](ac_file,row) else: self.report_error("invalid token: {tok} ({ln})".format(tok=row[0], ln=row)) except csv.Error(e): self.report_error('AC3D import error, line %d: %s' % (reader.line_num, e))
def _error_if_invalid_size_or_guid(record, existing_record, continue_after_error, allow_mult_guids_per_hash): """Log and raise errors based on cfg if hashes don't match or there's multiple GUIDs""" guid = existing_record.get(GUID_STANDARD_KEY) new_guid = record.get(GUID_STANDARD_KEY) if SIZE_STANDARD_KEY in existing_record: size = existing_record[SIZE_STANDARD_KEY] if size != record[SIZE_STANDARD_KEY]: error_msg = ( "Found two objects with the same hash but different sizes," f" could not merge. Details: object {existing_record} could not be" f" merged with object {record} because {size} !=" f" {record[SIZE_STANDARD_KEY]}.") logging.error(error_msg) if not continue_after_error: raise csv.Error(error_msg) # at this point, the record has the same hash and size as a previous guid # so either we're allowing an entry like that, or not if GUID_STANDARD_KEY in existing_record: if guid and new_guid and guid != new_guid: warning_msg = ( "Found two objects with the same hash but different guids," f" could not merge. Details: object {existing_record} could not be" f" merged with object {record} because {guid} !=" f" {new_guid}.") if not allow_mult_guids_per_hash: logging.error(warning_msg) raise csv.Error(error_msg) info_msg = ( f"Allowing multiple GUIDs per hash. {new_guid} has same " f"hash as {guid}.\n Details: {record} is a different " f"record with same hash as existing guid: {guid}.") logging.info(info_msg)
def _sniff(self, size=10000, delimiters=', \t|'): '''Detect a header and the dialect within the first size bytes.''' self.file.seek(0) sample = self.file.read(size) try: sample = sample.decode(self.encoding) except UnicodeDecodeError: raise csv.Error('Encountered invalid Unicode character') except AttributeError: pass self.file.seek(0) sniffer = csv.Sniffer() return sniffer.sniff(sample, delimiters), sniffer.has_header(sample)
def _is_fauxer_file( self ): # Does not check for MIME, checks for readibility and explicity dialect attributes.#See dialect this values should match. try: with open(self.fn, 'r') as file: dialect = csv.Sniffer().sniff(file.read(1024)) if dialect.delimiter != ',': raise csv.Error('Cell delimiter is not "," .') file.seek(0) except csv.Error as e: raise FileExistsError( '{} file is not a valid Fauxlizer (.faux) file. {}'.format( self.fn, str(e)))
async def test_raises_permission_denied_error(self): file_path = self.create_tempfile() os.remove(file_path) release_mngr = file_release_manager.CSVFileReleaseManager( file_path=file_path, save_mode=file_release_manager.CSVSaveMode.APPEND) with mock.patch.object(csv.DictWriter, 'writerow') as mock_writerow: mock_writerow.side_effect = csv.Error() with self.assertRaises(file_release_manager. FileReleaseManagerPermissionDeniedError): await release_mngr._append_value({})
def process_row(row, cur): """ process_row handles the tuples produced by the csv reader, and handles all errors from the actual database functions. Error handling is print messages and return to the for loop, so that we can recover from broken records. The goal is to insert as many good vulns and/or records as possible, and alert the user to the malformed records. db.commit() happens after every successful insertVuln()/insertHost(), and every successful insertInstance(). Row Header:Plugin ID,CVE,CVSS,Risk,Host,Protocol,Port,Name,Synopsis,Description,Solution,See Also,Plugin Output Sample Row: 10881,,,None,192.168.1.5,tcp,22,SSH Protocol Versions Supported,A SSH server is running on the remote host., "This plugin determines the versions of the SSH protocol supported by the remote SSH daemon.",n/a,,"The remote SSH daemon supports the following versions of the SSH protocol :-X.XX- X.X SSHv2 host key fingerprint : XX:02:XX:07:54:05:b0:XX:4b:dd:88:XX:43:ae:XX:0a" """ if row[3] == "None" or row[3] == "Risk": #not a vuln return pluginID, cve, cvss, risk, hostName, protocol, port, vulnName, vulnDescription, longDescription, solution, url, pluginOutput = row textString = "<div id=\"protocol\">Protocol: " + protocol + "</div> \ <div id=\"port\">Port: " + port + "</div> \ <div id=\"detailed-explanation\">More Details: " + longDescription + "</div> \ <div id=\"plugin-output\">Plugin Output: " + pluginOutput + "</div>" descString = "<div id=\"description\">DESCRIPTION: " + vulnDescription + "</div>\ <div id=\"cvss-score\">CVSS: " + cvss + "</div> \ <div id=\"solution\">Solution: " + solution + "</div>" if (hostName == '' or vulnName == '' or vulnDescription == ''): raise csv.Error('Incomplete Record.') exitCode = 2 try: cur.execute("SELECT host_id FROM host WHERE host_name = %s", hostName) hostID = cur.fetchone() if hostID == None: hostID = insertHost(hostName) else: hostID = hostID[0] except MySQLdb.Error, e: try: print "Host '%s' Lookup Error [%d]: %s" % (hostName, e.args[0], e.args[1]) exitCode = 2 return except IndexError: print "Host '%s' Lookup Error: %s" % (hostName, str(e)) exitCode = 2 return
def read_properties(filename): """ Reads a given properties file with each line of the format key=value. Returns a dictionary containing the pairs. Keyword arguments: filename -- the name of the file to be read """ result={ } with open(filename, "rb") as csvfile: reader = csv.reader(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE) for row in reader: if len(row) != 2: raise csv.Error("Too many fields on row with contents: "+str(row)) result[row[0]] = row[1] return result
def loadConfig(): try: with open(configFilename, "rb") as csvfile: reader = csv.reader(csvfile, delimiter='=', escapechar='\\', quoting=csv.QUOTE_NONE) for row in reader: if len(row) != 2: raise csv.Error("Too many fields on row with contents: " + str(row)) configOptions[row[0]] = row[1] except: pass
def post(self, request, *args, **kwargs): #pylint:disable=unused-argument,too-many-locals plan = kwargs.get('plan') response = {'created': [], 'updated': [], 'failed': []} uploaded = request.FILES.get('file') filed = csv.reader( StringIO(uploaded.read().decode('utf-8', 'ignore') ) if uploaded else StringIO()) for row in filed: try: if len(row) == 2: full_name, email = row elif len(row) == 3: first_name, last_name, email = row full_name = '%s %s' % (first_name, last_name) else: raise csv.Error() except csv.Error: response['failed'].append({ 'data': { 'raw': row }, 'error': 'Unable to parse row' }) else: serializer = CartItemCreateSerializer( data={ 'plan': plan, 'full_name': full_name, 'sync_on': email, 'email': email }) if serializer.is_valid(): cart_item, created = self.insert_item( request, **serializer.data) if isinstance(cart_item, CartItem): cart_item = serializer.to_representation(cart_item) if created: response['created'].append(cart_item) else: response['updated'].append(cart_item) else: response['failed'].append({ 'data': serializer.data, 'error': serializer.errors }) return Response(response)
def write_entities(entities, header, outputfile): """ Reads a set of entities and saves them to a file. :param set entities: set of tuples with entities data: identifier, label, name\ and other attributes. :param list header: list of column names. :param str outputfile: path to file to be saved (including filename and extention). """ try: df = pd.DataFrame(list(entities), columns=header) df.to_csv(path_or_buf=outputfile, sep='\t', header=True, index=False, quotechar='"', line_terminator='\n', escapechar='\\') except csv.Error as err: raise csv.Error("Error writing etities to file: {}.\n {}".format(outputfile, err))
def write_relationships(relationships, header, outputfile): """ Reads a set of relationships and saves them to a file. :param set relationships: set of tuples with relationship data: source node, target node, \ relationship type, source and other attributes. :param list header: list of column names. :param str outputfile: path to file to be saved (including filename and extention). """ try: df = pd.DataFrame(list(relationships), columns=header) df.to_csv(path_or_buf=outputfile, sep='\t', header=True, index=False, quotechar='"', line_terminator='\n', escapechar='\\') except Exception as err: raise csv.Error("Error writing relationships to file: {}.\n {}".format(outputfile, err))
def sniff_dialect(sample, sep, skip_dialect, ui): t1 = time() try: if skip_dialect: ui.debug('investigate_encoding_and_dialect - skip dialect detect') if sep: csv.register_dialect('dataset_dialect', csv.excel, delimiter=sep) else: csv.register_dialect('dataset_dialect', csv.excel) dialect = csv.get_dialect('dataset_dialect') else: sniffer = csv.Sniffer() dialect = sniffer.sniff(sample, delimiters=sep) ui.debug('investigate_encoding_and_dialect - seconds to detect ' 'csv dialect: {}'.format(time() - t1)) except csv.Error: decoded_one = sample t2 = time() detector = Detector() delimiter, resampled = detector.detect(decoded_one) if len(delimiter) == 1: delimiter = delimiter[0] ui.info("Detected delimiter as %s" % delimiter) if sep is not None and sep != delimiter: delimiter = sep else: raise csv.Error( "The csv module failed to detect the CSV dialect. " "Try giving hints with the --delimiter argument, " "E.g --delimiter=','" ) sniffer = csv.Sniffer() dialect = sniffer.sniff(resampled, delimiters=delimiter) ui.debug('investigate_encoding_and_dialect v2 - seconds to detect ' 'csv dialect: {}'.format(time() - t2)) if dialect.escapechar is None: csv.register_dialect('dataset_dialect', dialect, delimiter=str(dialect.delimiter), quotechar=str(dialect.quotechar), doublequote=True) dialect = csv.get_dialect('dataset_dialect') return dialect
def readPropertyFile(self): import csv file_contents = self.readAll() file_lines = file_contents.splitlines() result = {} reader = csv.reader(file_lines, delimiter=str('='), quoting=csv.QUOTE_NONE) for row in reader: if len(row) != 2: raise csv.Error("Too many fields on row with contents: " + str(row)) result[row[0].strip()] = row[1].strip().lstrip('"').rstrip('"') return result
def parse_row(self, row, lineno): """Parse a row of a TD Canada Trust CSV file. Args: row: A list of field values for the row. lineno: The line number where the row appears in the CSV file Returns: A beansoup.importers.csv.Row object. """ if len(row) != 5: raise csvlib.Error( 'Invalid row; expecting 5 values: {}'.format(row)) date = datetime.datetime.strptime(row[0], '%m/%d/%Y').date() description = row[1] amount = -D(row[2]) if row[2] else D(row[3]) balance = self.account_sign * D(row[4]) return csv.Row(lineno, date, description, amount, balance)
def check_and_clean_up_row(cls, extended_rows): """Check if row length is same as header and standardize null values as empty strings""" for row_number, headers, row in extended_rows: if len(row) != len(headers): raise csv.Error( f'row {row_number} has a different number of data ' f'points ({len(row)}) than there are column headers ({len(headers)})' ) cleaned_row = list( map( lambda value: reduce( lambda value, null_value: value.replace( null_value, ''), CSV_NULL_VALUES, value, ), row, )) yield (row_number, headers, cleaned_row)
def get_rows_from_file(file): data = file.read() try: import chardet charset = chardet.detect(data)['encoding'] except ImportError: charset = file.charset data = data.decode(charset or 'utf-8') # If the file was modified on a Mac, it only contains \r as line breaks if '\r' in data and '\n' not in data: data = data.replace('\r', '\n') # Sniffing line by line is necessary as some banks like to include # one-column garbage at the beginning of the file which breaks the sniffer. # See also: http://bugs.python.org/issue2078 last_e = None dialect = None for line in data.split("\n"): line = line.strip() if len(line) == 0: continue try: dialect = csv.Sniffer().sniff(line, delimiters=";,.#:") except Exception as e: last_e = e else: last_e = None break if dialect is None: raise last_e or csv.Error("No dialect detected") reader = csv.reader(io.StringIO(data), dialect) rows = [] for row in reader: if rows and len(row) > len(rows[0]): # Some banks put metadata above the real data, things like # a headline, the bank's name, the user's name, etc. # In many cases, we can identify this because these rows # have less columns than the rows containing the real data. # Therefore, if the number of columns suddenly grows, we start # over with parsing. rows = [] rows.append(row) return rows