import csv

with open('data1.csv', newline='') as source:
    dialect = csv.Sniffer().sniff(source.readline())
    source.seek(0)
    reader = csv.reader(source, dialect)
    number_of_columns = len(next(reader))
    source.seek(0)
    with open("data/brazil.csv", "w") as goalkeepers_file:
        gk_writer = csv.writer(goalkeepers_file)
        with open("data/players.csv", "w") as players_file:
            pl_writer = csv.writer(players_file)
            gk_index = 0
            pl_index = 0

            position_column = -1
            for r in reader:
                for i in range(number_of_columns):
                    if r[i] == "Nationality":
                        position_column = i

            source.seek(0)
            header = next(reader)
            gk_writer.writerow(header)
            pl_writer.writerow(header)
            for r in reader:
                if r[position_column] == "Brazil":
                    r[0] = gk_index
                    gk_writer.writerow(r)
                    gk_index += 1
                else:
Example #2
0
    def clean_file(self):
        """Parses the CSV file."""
        file = self.cleaned_data.get("file")

        if file._size > self.max_upload_size:
            raise forms.ValidationError(
                _(u"Uploaded file is too large ( > 1MB )"))

        if file:

            try:
                dialect = csv.Sniffer().sniff(file.read(1024))
                file.seek(0)
                reader = csv.reader(file, dialect)

                try:
                    header_row = reader.next()
                except StopIteration:
                    raise forms.ValidationError("That CSV file is empty.")
                headers = [
                    RE_WHITESPACE.sub(
                        "_",
                        cell.decode("utf-8", "ignore").lower().strip())
                    for cell in header_row
                ]
                # Check the required fields.
                if len(headers) == 0:
                    raise forms.ValidationError(
                        "That CSV file did not contain a valid header line.")

                if not "project-task" in headers:
                    raise forms.ValidationError(
                        "Could not find a column labelled 'project-task' in that CSV file."
                    )

                if not "file-name" in headers:
                    raise forms.ValidationError(
                        "Could not find a column labelled 'file-name' in that CSV file."
                    )

                if not "folder-name" in headers:
                    raise forms.ValidationError(
                        "Could not find a column labelled 'folder-name' in that CSV file."
                    )

                # Go through the rest of the CSV file.
                clean_rows = []
                invalid_rows = []
                invalid_cells = []
                for y_index, row in enumerate(reader, 2):
                    row = [
                        cell.decode("utf-8", "ignore").strip() for cell in row
                    ]
                    try:
                        row_data = dict(zip(headers, row))
                    except IndexError:
                        invalid_rows.append((y_index, row_data))

                    # ignore blank rows
                    if not ''.join(str(x) for x in row):
                        continue

                    for x_index, cell_value in enumerate(row):
                        try:
                            headers[x_index]
                        except IndexError:
                            continue

                        if headers[x_index]:
                            if not cell_value:
                                invalid_rows.append(
                                    (headers[x_index], y_index))
                                raise ValidationError(
                                    u'Missing required value %s for row %s' %
                                    (headers[x_index], y_index + 1))

            except csv.Error:
                raise forms.ValidationError("Please upload a valid CSV file.")
            # Check that some rows were parsed.

            if not clean_rows and invalid_rows:
                raise forms.ValidationError(
                    " Workitems could not be imported, due to errors in that CSV file."
                )
            # Store the parsed data.
            self.cleaned_data["rows"] = clean_rows
            self.cleaned_data["invalid_rows"] = invalid_rows
        return file
Example #3
0

@requires_segment_info
def winnr(pl, segment_info, show_current=True):
    '''Show window number

	:param bool show_current:
		If False do not show current window number.
	'''
    winnr = segment_info['winnr']
    if show_current or winnr != vim.current.window.number:
        return str(winnr)


csv_cache = None
sniffer = csv.Sniffer()


def detect_text_csv_dialect(text, display_name, header_text=None):
    return (
        sniffer.sniff(string(text)),
        sniffer.has_header(string(header_text or text))
        if display_name == 'auto' else display_name,
    )


CSV_SNIFF_LINES = 100
CSV_PARSE_LINES = 10

if sys.version_info < (2, 7):
Example #4
0
    def process_csv_lines(csv_lines):
        dialect = None
        try:
            dialect = csv.Sniffer().sniff("".join(csv_lines[:3]),
                                          options.delimiter)
        except csv.Error:  # can't guess specific dialect, try without one
            pass

        bank_reader = csv.reader(csv_lines, dialect)

        for i, row in enumerate(bank_reader):
            # Skip any empty lines in the input
            if len(row) == 0:
                continue

            entry = Entry(row, csv_lines[i], options)

            # detect duplicate entries in the ledger file and optionally skip or prompt user for action
            #if options.skip_dupes and csv_lines[i].strip() in csv_comments:
            if (options.skip_older_than < 0) or (entry.days_old <=
                                                 options.skip_older_than):
                if options.clear_screen:
                    print('\033[2J\033[;H')
                print('\n' + entry.prompt())
                if (options.skip_dupes or options.confirm_dupes
                    ) and entry.md5sum in md5sum_hashes:
                    value = 'Y'
                    # if interactive flag was passed prompt user before skipping transaction
                    if options.confirm_dupes:
                        yn_response = prompt_for_value(
                            'Duplicate transaction detected, skip?',
                            possible_yesno, 'Y')
                        if yn_response:
                            value = yn_response
                    if value.upper().strip() not in ('N', 'NO'):
                        continue
                while True:
                    payee, account, tags = get_payee_and_account(entry)
                    value = 'C'
                    if options.entry_review:
                        # need to display ledger formatted entry here
                        #
                        # request confirmation before committing transaction
                        print('\n' + 'Ledger Entry:')
                        print(entry.journal_entry(i + 1, payee, account, tags))
                        yn_response = prompt_for_value(
                            'Commit transaction (Commit, Modify, Skip)?',
                            ('C', 'M', 'S'), value)
                        if yn_response:
                            value = yn_response
                    if value.upper().strip() not in ('C', 'COMMIT'):
                        if value.upper().strip() in ('S', 'SKIP'):
                            break
                        else:
                            continue
                    else:
                        # add md5sum of new entry, this helps detect duplicate entries in same file
                        md5sum_hashes.add(entry.md5sum)
                        break
                if value.upper().strip() in ('S', 'SKIP'):
                    continue

                yield entry.journal_entry(i + 1, payee, account, tags)
Example #5
0
def process_csv_file(absolute_base_file, table_name_temp, new_table,
                     geom_table_name, geom_table_id, geom_table_columns,
                     geom_table_geom):
    # Create table based on CSV
    import csv
    f = open(absolute_base_file, 'rb')
    no_header_row = False

    with open(absolute_base_file, 'rb') as csvfile:
        # get the type of delimiter
        dialect = csv.Sniffer().sniff(csvfile.read())

    try:
        csv_table = table.Table.from_csv(f,
                                         name=table_name_temp,
                                         no_header_row=no_header_row,
                                         delimiter=dialect.delimiter)
    except:
        status_code = '400'
        errormsgs_val = "Failed to create the table from CSV."
        return errormsgs_val, status_code

    for idx, column in enumerate(csv_table):
        column.name = slugify(unicode(column.name)).replace('-', '_')
        # Check if the selected value from the dropdown menu matches the first value of the CSV header
        if idx == 0:
            print("column.name.strip()", column.name.strip())
            print("geom_table_id.strip()", geom_table_id.strip())
            if column.name.strip() != geom_table_id.strip():
                errormsgs_val = "The selected value of Layer Type doesn't match the one of the imported layer."
                status_code = '400'
                return errormsgs_val, status_code
    # Check if there are added columns in the CSV
    if idx < 2:
        errormsgs_val = "The CSV has no added columns. Please add extra columns."
        status_code = '400'
        return errormsgs_val, status_code
    else:
        try:
            sql_table = sql.make_table(csv_table, table_name_temp)
            create_table_sql = sql.make_create_table_statement(
                sql_table, dialect="postgresql")
            create_table_sql = re.sub(r'VARCHAR\([0-9]*\)', 'VARCHAR(254)',
                                      create_table_sql)
        except:
            return None, str(sys.exc_info()[0])

        constr = "dbname='{dbname}' user='******' host='{host}' password='******'".format(
            **{
                'dbname': settings.DATABASES['uploaded']['NAME'],
                'user': settings.DATABASES['uploaded']['USER'],
                'host': settings.DATABASES['uploaded']['HOST'],
                'password': settings.DATABASES['uploaded']['PASSWORD']
            })
        conn = psycopg2.connect(constr)

        try:
            # Check if there is already a table with the same name
            cur = conn.cursor()

            sqlstr = "SELECT EXISTS(SELECT * FROM information_schema.tables WHERE table_name='{new_table_name}');".format(
                **{'new_table_name': new_table})
            cur.execute(sqlstr)
            exists = cur.fetchone()[0]
            if exists:
                errormsgs_val = "There is already a layer with this name. Please choose another title."
                status_code = '400'
                return errormsgs_val, status_code

            #  If temporary table exists then drop it - the create it and add primary key
            cur.execute('DROP TABLE IF EXISTS %s CASCADE;' % table_name_temp)
            cur.execute(create_table_sql)
            conn.commit()
            sqlstr = "ALTER TABLE IF EXISTS {temp_table} ADD COLUMN fid SERIAL PRIMARY KEY;".format(
                **{'temp_table': table_name_temp})
            cur.execute(sqlstr)
            conn.commit()
        except Exception as e:
            logger.error("Error Creating Temporary table %s:%s",
                         table_name_temp, str(e))

        #  Copy data to table
        connection_string = "postgresql://%s:%s@%s:%s/%s" % (
            settings.DATABASES['uploaded']['USER'],
            settings.DATABASES['uploaded']['PASSWORD'],
            settings.DATABASES['uploaded']['HOST'],
            settings.DATABASES['uploaded']['PORT'],
            settings.DATABASES['uploaded']['NAME'])
        try:
            engine, metadata = sql.get_connection(connection_string)
        except ImportError:
            return None, str(sys.exc_info()[0])

        conn_eng = engine.connect()
        trans = conn_eng.begin()

        if csv_table.count_rows() > 0:
            insert = sql_table.insert()
            headers = csv_table.headers()
            try:
                conn_eng.execute(
                    insert,
                    [dict(zip(headers, row)) for row in csv_table.to_rows()])
            except:
                return None, str(sys.exc_info()[0])

        trans.commit()
        conn_eng.close()

        # Create joined table - drop table_name_temp
        new_clmns = []
        for idx, item in enumerate(headers):
            if (
                    idx > 1
            ):  # The downloaded layer contains two columns from the global table, which do not include them again
                new_column = "{table_name}.{item}".format(**{
                    'table_name': table_name_temp,
                    'item': item
                })
                new_clmns.append(new_column)

        added_columns = ', '.join(new_clmns)
        try:

            # Joined table
            sqlstr = "CREATE TABLE {new_table_name} AS (SELECT {geom_table_columns}, {added_columns} FROM {geom_table} INNER JOIN {temp_table} ON (g.{id} = {temp_table}.{id}));".format(
                **{
                    'new_table_name': new_table,
                    'geom_table': geom_table_name,
                    'geom_table_columns': geom_table_columns,
                    'temp_table': table_name_temp,
                    'id': geom_table_id,
                    'added_columns': added_columns
                })
            cur.execute(sqlstr)
            conn.commit()
            sqlstr = "ALTER TABLE IF EXISTS {new_table_name} ADD COLUMN fid SERIAL PRIMARY KEY;".format(
                **{'new_table_name': new_table})
            cur.execute(sqlstr)
            conn.commit()

            sqlstr = "CREATE INDEX indx_{new_table_name} ON {new_table_name} USING btree({id});".format(
                **{
                    'new_table_name': new_table,
                    'id': geom_table_id,
                })
            cur.execute(sqlstr)
            conn.commit()
            sqlstr = "CREATE INDEX indx_geom_{new_table_name} ON {new_table_name} USING GIST({geom});".format(
                **{
                    'new_table_name': new_table,
                    'geom': geom_table_geom,
                })
            cur.execute(sqlstr)
            conn.commit()

        except:
            print "Failed to create joined table."
            logger.error("Failed to create joined table.")

        try:
            sqlstr = "DROP TABLE IF EXISTS {temp_table} CASCADE;".format(
                **{'temp_table': table_name_temp})
            cur.execute(sqlstr)
            conn.commit()
        except:
            logger.error("Failed to drop temporary table.")
        conn.close()

        status_code = 200
        errormsgs_val = ''
        return errormsgs_val, status_code
def make_rwc_popular_index(data_path):
    annotations_dir = os.path.join(data_path, 'RWC-Popular', 'annotations')
    metadata_dir = os.path.join(data_path, 'RWC-Popular', 'metadata-master')
    audio_dir = os.path.join(data_path, 'RWC-Popular', 'audio')
    annotations_files = os.listdir(
        os.path.join(annotations_dir, 'AIST.RWC-MDB-P-2001.CHORUS'))
    metadata_file = os.path.join(metadata_dir, 'rwc-p.csv')
    with open(metadata_file, 'r', encoding='utf-8') as fhandle:
        dialect = csv.Sniffer().sniff(fhandle.read(1024))
        fhandle.seek(0)
        reader = csv.reader(fhandle, dialect)
        piece = []
        suffix = []
        track = []
        for line in reader:
            if not line[0] == "Piece No.":
                p = '00' + line[0].split('.')[1][1:]
                piece.append(p[len(p) - 3:])
                suffix.append(line[1][1:])
                track.append(line[2][-2:])

    mapping_track = {p: t for p, t in zip(piece, track)}
    mapping_folder = {p: s for p, s in zip(piece, suffix)}

    track_ids = sorted([
        os.path.basename(f).split('.')[0] for f in annotations_files
        if not f == 'README.TXT'
    ])

    rwc_popular_index = {}
    for track_id in track_ids:
        # audio
        audio_folder = 'rwc-p-m{}'.format(mapping_folder[track_id[4:]])
        audio_path = os.path.join(audio_dir, audio_folder)
        audio_track = str(int(mapping_track[track_id[4:]]))
        audio_checksum = md5(
            os.path.join(audio_path, "{}.wav".format(audio_track)))
        annot_checksum = []
        annot_rels = []

        for f in ['CHORUS', 'BEAT', 'CHORD', 'VOCA_INST']:
            if f is 'CHORD':
                if os.path.exists(
                        os.path.join(
                            annotations_dir,
                            'AIST.RWC-MDB-P-2001.{}'.format(f),
                            'RWC_Pop_Chords',
                            'N{}-M{}-T{}.lab'.format(
                                track_id[-3:],
                                mapping_folder[track_id[-3:]],
                                mapping_track[track_id[-3:]],
                            ),
                        )):
                    annot_checksum.append(
                        md5(
                            os.path.join(
                                annotations_dir,
                                'AIST.RWC-MDB-P-2001.{}'.format(f),
                                'RWC_Pop_Chords',
                                'N{}-M{}-T{}.lab'.format(
                                    track_id[-3:],
                                    mapping_folder[track_id[-3:]],
                                    mapping_track[track_id[-3:]],
                                ),
                            )))
                    annot_rels.append(
                        os.path.join(
                            'annotations',
                            'AIST.RWC-MDB-P-2001.{}'.format(f),
                            'RWC_Pop_Chords',
                            'N{}-M{}-T{}.lab'.format(
                                track_id[-3:],
                                mapping_folder[track_id[-3:]],
                                mapping_track[track_id[-3:]],
                            ),
                        ))
                else:
                    annot_checksum.append(None)
                    annot_rels.append(None)
            else:
                if os.path.exists(
                        os.path.join(
                            annotations_dir,
                            'AIST.RWC-MDB-P-2001.{}'.format(f),
                            '{}.{}.TXT'.format(track_id, f),
                        )):
                    annot_checksum.append(
                        md5(
                            os.path.join(
                                annotations_dir,
                                'AIST.RWC-MDB-P-2001.{}'.format(f),
                                '{}.{}.TXT'.format(track_id, f),
                            )))
                    annot_rels.append(
                        os.path.join(
                            'annotations',
                            'AIST.RWC-MDB-P-2001.{}'.format(f),
                            '{}.{}.TXT'.format(track_id, f),
                        ))
                else:
                    annot_checksum.append(None)
                    annot_rels.append(None)

        rwc_popular_index[track_id] = {
            'audio': (
                os.path.join('audio', audio_folder,
                             "{}.wav".format(audio_track)),
                audio_checksum,
            ),
            'sections': (annot_rels[0], annot_checksum[0]),
            'beats': (annot_rels[1], annot_checksum[1]),
            'chords': (annot_rels[2], annot_checksum[2]),
            'voca_inst': (annot_rels[3], annot_checksum[3]),
        }

    with open(RWC_POPULAR_INDEX_PATH, 'w') as fhandle:
        json.dump(rwc_popular_index, fhandle, indent=2)
Example #7
0
def read_csv_cirrus(filename):
    """Read a Cirrus CSV file. Currently exists support for some types of
    CSV files extracted with NoiseTools. There is no support for CSVs related
    with occupational noise.

    If there are NC and NR values in the csv file, they will be stored in the
    returned object with attributes ``nc`` and ``nr``. If the CSV file contains
    time history, you can access to date and time with the ``time`` attribute.
    Also, it is possible to know the integration time with the
    ``integration_time`` attribute.

    :param filename: CSV file name.
    :returns: Pandas dataframe with all data extracted from the CSV file.
    :rtype: Pandas dataframe.

    """
    with open(filename, "r") as csvfile:
        csvreader = csvfile.read()
        csvreader = re.sub(r" dB", "", csvreader)  # Clean " dB" from data
        dialect = csv.Sniffer().sniff(csvreader, delimiters=",;")
        separator = dialect.delimiter
        # Guess decimal separator
        decimal_sep = re.search(r"\"\d{2,3}"
                                r"(\.|,)"  # Decimal separator
                                r"\d{1,2}\"",
                                csvreader).group(1)
    n_cols = re.search("(.+)\n", csvreader).group(1).count(separator) + 1
    if n_cols < 5:
        unsorted_data = []
        pdindex = ["Z"]
        for i, c in enumerate(csvreader.splitlines()):
            if c[:4] == '"NR"':
                nr = int(re.search(r"\d{2}", c).group(0))
                continue
            elif c[:4] == '"NC"':
                nc = int(re.search(r"\d{2}", c).group(0))
                continue
            if i != 0:
                unsorted_data.append(c.split(separator))
            else:
                if n_cols == 3:
                    pdindex.append(c[-2:-1])
                elif n_cols == 4:
                    pdindex.append("A")
                    pdindex.append("C")

        # Create a sorted temporary csv-like file
        csv_data = list(zip(*unsorted_data))
        temp_csv = ""
        for row in csv_data:
            temp_csv += separator.join(row) + "\n"
        # Then, read it with pandas
        data = pd.read_csv(io.StringIO(temp_csv), sep=separator,
                           decimal=decimal_sep)

        # Assign NC and NR data if they are present
        try:
            data.nc = nc
            data.nr = nr
        except:
            pass

        # If the csv file contains global data from the "Details" tab in
        # NoiseTools, skip row names
        if n_cols != 2:
            data.index = pdindex

    else:
        data = pd.read_csv(filename, parse_dates=[[0, 1]], sep=separator,
                           decimal=decimal_sep)

        # Fix time name column
        en_columns = data.columns.values
        en_columns[0] = "time"
        data.columns = en_columns

        # Guess integration time with statistical mode because the csv could
        # have been cleaned from unwanted noise
        data["time"] = pd.to_datetime(data.time)
        delta = data.time.diff().fillna(0)
        int_time = int(delta.mode()) * 1e-9  # Mode and change from ns to s
        if round(int_time, 2) == 0.06:  # Fix for 1/16 s
            int_time = 0.0625
        data.integration_time = int_time

    return data
Example #8
0
 def test_doublequote(self):
     sniffer = csv.Sniffer()
     dialect = sniffer.sniff(self.header)
     self.assertFalse(dialect.doublequote)
     dialect = sniffer.sniff(self.sample2)
     self.assertTrue(dialect.doublequote)
Example #9
0
    def run(self, args):
        """Reads in a CSV, performs augmentation, and outputs an augmented CSV.

        Preserves all columns except for the input (augmneted) column.
        """
        if args.interactive:

            print("\nRunning in interactive mode...\n")
            augmenter = eval(AUGMENTATION_RECIPE_NAMES[args.recipe])(
                pct_words_to_swap=args.pct_words_to_swap,
                transformations_per_example=args.transformations_per_example,
            )
            print("--------------------------------------------------------")

            while True:
                print(
                    '\nEnter a sentence to augment, "q" to quit, "c" to view/change arguments:\n'
                )
                text = input()

                if text == "q":
                    break

                elif text == "c":
                    print(
                        f"\nCurrent Arguments:\n\n\t augmentation recipe: {args.recipe}, "
                        f"\n\t pct_words_to_swap: {args.pct_words_to_swap}, "
                        f"\n\t transformations_per_example: {args.transformations_per_example}\n"
                    )

                    change = input(
                        "Enter 'c' again to change arguments, any other keys to opt out\n"
                    )
                    if change == "c":
                        print("\nChanging augmenter arguments...\n")
                        recipe = input(
                            "\tAugmentation recipe name ('r' to see available recipes):  "
                        )
                        if recipe == "r":
                            print(
                                "\n\twordnet, embedding, charswap, eda, checklist\n"
                            )
                            args.recipe = input(
                                "\tAugmentation recipe name:  ")
                        else:
                            args.recipe = recipe

                        args.pct_words_to_swap = float(
                            input(
                                "\tPercentage of words to swap (0.0 ~ 1.0):  ")
                        )
                        args.transformations_per_example = int(
                            input("\tTransformations per input example:  "))

                        print("\nGenerating new augmenter...\n")
                        augmenter = eval(
                            AUGMENTATION_RECIPE_NAMES[args.recipe])(
                                pct_words_to_swap=args.pct_words_to_swap,
                                transformations_per_example=args.
                                transformations_per_example,
                            )
                        print(
                            "--------------------------------------------------------"
                        )

                    continue

                elif not text:
                    continue

                print("\nAugmenting...\n")
                print(
                    "--------------------------------------------------------")

                for augmentation in augmenter.augment(text):
                    print(augmentation, "\n")
                print(
                    "--------------------------------------------------------")
        else:
            textattack.shared.utils.set_seed(args.random_seed)
            start_time = time.time()
            if not (args.csv and args.input_column):
                raise ArgumentError(
                    "The following arguments are required: --csv, --input-column/--i"
                )
            # Validate input/output paths.
            if not os.path.exists(args.csv):
                raise FileNotFoundError(
                    f"Can't find CSV at location {args.csv}")
            if os.path.exists(args.outfile):
                if args.overwrite:
                    textattack.shared.logger.info(
                        f"Preparing to overwrite {args.outfile}.")
                else:
                    raise OSError(
                        f"Outfile {args.outfile} exists and --overwrite not set."
                    )
            # Read in CSV file as a list of dictionaries. Use the CSV sniffer to
            # try and automatically infer the correct CSV format.
            csv_file = open(args.csv, "r")
            dialect = csv.Sniffer().sniff(csv_file.readline(), delimiters=";,")
            csv_file.seek(0)
            rows = [
                row for row in csv.DictReader(
                    csv_file, dialect=dialect, skipinitialspace=True)
            ]
            # Validate input column.
            row_keys = set(rows[0].keys())
            if args.input_column not in row_keys:
                raise ValueError(
                    f"Could not find input column {args.input_column} in CSV. Found keys: {row_keys}"
                )
            textattack.shared.logger.info(
                f"Read {len(rows)} rows from {args.csv}. Found columns {row_keys}."
            )

            augmenter = eval(AUGMENTATION_RECIPE_NAMES[args.recipe])(
                pct_words_to_swap=args.pct_words_to_swap,
                transformations_per_example=args.transformations_per_example,
            )

            output_rows = []
            for row in tqdm.tqdm(rows, desc="Augmenting rows"):
                text_input = row[args.input_column]
                if not args.exclude_original:
                    output_rows.append(row)
                for augmentation in augmenter.augment(text_input):
                    augmented_row = row.copy()
                    augmented_row[args.input_column] = augmentation
                    output_rows.append(augmented_row)
            # Print to file.
            with open(args.outfile, "w") as outfile:
                csv_writer = csv.writer(outfile,
                                        delimiter=",",
                                        quotechar='"',
                                        quoting=csv.QUOTE_MINIMAL)
                # Write header.
                csv_writer.writerow(output_rows[0].keys())
                # Write rows.
                for row in output_rows:
                    csv_writer.writerow(row.values())
            textattack.shared.logger.info(
                f"Wrote {len(output_rows)} augmentations to {args.outfile} in {time.time() - start_time}s."
            )
Example #10
0
    def __parseCsvFile(self, handle):
        """
        Parse a CSV file. Does not reset the file handle to start.

        @arg handle: CSV file. Must be a seekable binary file object.
        @type handle: file object

        @return: list of lists
        @rtype: list
        """
        buf = handle.read(BUFFER_SIZE)
        result = chardet.detect(buf)
        handle.seek(0)

        if result['confidence'] > 0.5:
            encoding = unicode(result['encoding'])
        else:
            encoding = 'utf-8'

        # Python 2.7 makes it extraordinarily hard to do this correctly. We
        # have a binary file object containing lines of text in a certain
        # encoding with unknown style of line-endings.
        #
        # We want to correctly decode the file contents, accept any style of
        # line-endings, parse the lines with the `csv` module, and return
        # unicode strings.
        #
        # 1. `codecs.getreader` does not have a universal newlines mode.
        # 2. `io.TextIOWrapper` cannot be wrapped around our file object,
        #    since it is required to be an `io.BufferedIOBase`, which it
        #    usually will not be.
        # 3. The `csv` module cannot read unicode.
        #
        # Ugh.
        #
        # So, we use a stream wrapper that consumes byte strings, decodes to
        # unicode, normalises newlines, and produces the result UTF-8 encoded.
        # That's what we feed the `csv` module. We decode what it gives back
        # to unicode strings. What a mess.
        handle = _UniversalNewlinesByteStreamIter(handle,
                                                  encoding=encoding,
                                                  buffer_size=BUFFER_SIZE)

        try:
            buf = handle.read(BUFFER_SIZE)
        except UnicodeDecodeError:
            self.__output.addMessage(
                __file__, 3, 'EBPARSE',
                'Could not decode file (using %s encoding).' % encoding)
            return None

        # Default dialect
        dialect = 'excel'

        # The idea is that for new-style batch input files we have only
        # one column and the sniffer cannot find a delimiter.

        try:
            # Todo: delimiters in config file
            dialect = csv.Sniffer().sniff(buf, delimiters="\t ;|,")
            dialect.skipinitialspace = True
        except csv.Error:
            #self.__output.addMessage(__file__, 4, "EBPARSE", e)
            #return None
            pass
        #except

        #Watch out for : delimiter FIXME and for the . delimiter
#        if dialect.delimiter == ":":
#            dialect.delimiter = "\t"

        handle.seek(0)
        reader = csv.reader(handle, dialect)

        ret = []
        try:
            for i in reader:
                ret.append([c.decode('utf-8') for c in i])
        except UnicodeDecodeError:
            self.__output.addMessage(
                __file__, 3, 'EBPARSE',
                'Could not decode file (using %s encoding).' % encoding)
            return None

        return ret
Example #11
0
total_votes = 0
vote_khan = 0
khan_per = 0.000
vote_correy = 0
correy_per = 0.000
vote_li = 0
li_per = 0.000
vote_tooley = 0
tooley_per = 0.000
vote_dict = {}

with open(csvpath, newline="") as csvfile:
    csvreader = csv.reader(csvfile, delimiter=',')

    if csv.Sniffer().has_header:
        next(csvreader)

    for row in csvreader:

        if row[2] == "Khan":
            vote_khan += 1

        if row[2] == "Correy":
            vote_correy += 1

        if row[2] == "Li":
            vote_li += 1

        if row[2] == "O'Tooley":
            vote_tooley += 1
Example #12
0
    def _load_simple_text_file(file, time_col=0, id_col=None, remove_negative_ids=False, valid_filter=None,
                               crowd_ignore_filter=None, convert_filter=None, is_zipped=False, zip_file=None,
                               force_delimiters=None):
        """ Function that loads data which is in a commonly used text file format.
        Assumes each det is given by one row of a text file.
        There is no limit to the number or meaning of each column,
        however one column needs to give the timestep of each det (time_col) which is default col 0.

        The file dialect (deliminator, num cols, etc) is determined automatically.
        This function automatically separates dets by timestep,
        and is much faster than alternatives such as np.loadtext or pandas.

        If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded.
        These are not excluded from ignore data.

        valid_filter can be used to only include certain classes.
        It is a dict with ints as keys, and lists as values,
        such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict.
        If None, all classes are included.

        crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter.

        convert_filter can be used to convert value read to another format.
        This is used most commonly to convert classes given as string to a class id.
        This is a dict such that the key is the column to convert, and the value is another dict giving the mapping.

        Optionally, input files could be a zip of multiple text files for storage efficiency.

        Returns read_data and ignore_data.
        Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values).
        Note that all data is returned as strings, and must be converted to float/int later if needed.
        Note that timesteps will not be present in the returned dict keys if there are no dets for them
        """

        if remove_negative_ids and id_col is None:
            raise TrackEvalException('remove_negative_ids is True, but id_col is not given.')
        if crowd_ignore_filter is None:
            crowd_ignore_filter = {}
        if convert_filter is None:
            convert_filter = {}
        try:
            if is_zipped:  # Either open file directly or within a zip.
                if zip_file is None:
                    raise TrackEvalException('is_zipped set to True, but no zip_file is given.')
                archive = zipfile.ZipFile(os.path.join(zip_file), 'r')
                fp = io.TextIOWrapper(archive.open(file, 'r'))
            else:
                fp = open(file)
            read_data = {}
            crowd_ignore_data = {}
            fp.seek(0, os.SEEK_END)
            # check if file is empty
            if fp.tell():
                fp.seek(0)
                dialect = csv.Sniffer().sniff(fp.readline(), delimiters=force_delimiters)  # Auto determine structure.
                dialect.skipinitialspace = True  # Deal with extra spaces between columns
                fp.seek(0)
                reader = csv.reader(fp, dialect)
                for row in reader:
                    try:
                        # Deal with extra trailing spaces at the end of rows
                        if row[-1] in '':
                            row = row[:-1]
                        timestep = str(int(float(row[time_col])))
                        # Read ignore regions separately.
                        is_ignored = False
                        for ignore_key, ignore_value in crowd_ignore_filter.items():
                            if row[ignore_key].lower() in ignore_value:
                                # Convert values in one column (e.g. string to id)
                                for convert_key, convert_value in convert_filter.items():
                                    row[convert_key] = convert_value[row[convert_key].lower()]
                                # Save data separated by timestep.
                                if timestep in crowd_ignore_data.keys():
                                    crowd_ignore_data[timestep].append(row)
                                else:
                                    crowd_ignore_data[timestep] = [row]
                                is_ignored = True
                        if is_ignored:  # if det is an ignore region, it cannot be a normal det.
                            continue
                        # Exclude some dets if not valid.
                        if valid_filter is not None:
                            for key, value in valid_filter.items():
                                if row[key].lower() not in value:
                                    continue
                        if remove_negative_ids:
                            if int(float(row[id_col])) < 0:
                                continue
                        # Convert values in one column (e.g. string to id)
                        for convert_key, convert_value in convert_filter.items():
                            row[convert_key] = convert_value[row[convert_key].lower()]
                        # Save data separated by timestep.
                        if timestep in read_data.keys():
                            read_data[timestep].append(row)
                        else:
                            read_data[timestep] = [row]
                    except Exception:
                        exc_str_init = 'In file %s the following line cannot be read correctly: \n' % os.path.basename(
                            file)
                        exc_str = ' '.join([exc_str_init]+row)
                        raise TrackEvalException(exc_str)
            fp.close()
        except Exception:
            print('Error loading file: %s, printing traceback.' % file)
            traceback.print_exc()
            raise TrackEvalException(
                'File %s cannot be read because it is either not present or invalidly formatted' % os.path.basename(
                    file))
        return read_data, crowd_ignore_data
Example #13
0
def csvreader(reader, rep, chemin, fichier, entete=None, separ=None):
    reader.prepare_lecture_fichier(rep, chemin, fichier)
    logger = reader.regle_ref.stock_param.logger
    if separ is None:
        separ = reader.separ
    # nom_schema, nom_groupe, nom_classe = getnoms(rep, chemin, fichier)
    nbwarn = 0
    # print(" lecture_csv, separ:", len(separ), separ, "<>", reader.encoding)
    with open(
        os.path.join(rep, chemin, fichier), newline="", encoding=reader.encoding
    ) as csvfile:
        sample = csvfile.read(4094)
        try:
            dialect = csv.Sniffer().sniff(sample, delimiters=separ)
        except csv.Error:
            logger.warning("erreur determination dialecte csv, parametres par defaut")
            dref = csv.get_dialect("excel")
            linesep = "\r\n" if "\r\n" in sample else "\n"
            has_header = False
            if sample.startswith("!"):
                hline = sample.split(linesep, 1)[0]
                entete = hline[1:].split(separ)
                csvfile.seek(0)
                has_header = True
            csv.register_dialect(
                "special", dref, delimiter=separ, lineterminator=linesep
            )
            dialect = csv.get_dialect("special")
            lecteur = csv.DictReader(csvfile, dialect=dialect)
            if has_header:
                lecteur.__next__()

        if entete is None:
            has_header = csv.Sniffer().has_header(sample) or sample.startswith("!")
            csvfile.seek(0)
            lecteur = csv.DictReader(csvfile, dialect=dialect)
            if has_header:
                entete = [
                    i.replace(" ", "_").replace("!", "") for i in lecteur.fieldnames
                ]
            else:
                entete = ["champ_" + str(i) for i in range(len(lecteur.fieldnames))]

        if entete[-1] == "tgeom" or entete[-1] == "geometrie":
            entete[-1] = "#geom"

        lecteur = csv.DictReader(
            csvfile, fieldnames=entete, dialect=dialect, restval="", restkey="#reste"
        )
        # print("entete csv", entete, dialect.delimiter)
        if reader.newschema:
            for i in entete:
                if i[0] != "#":
                    reader.schemaclasse.stocke_attribut(i, "T")
        reader.prepare_attlist(entete)
        # print("attlist", reader.attlist)
        type_geom = "-1" if entete[-1] == "#geom" else "0"
        reader.fixe["#type_geom"] = type_geom
        for attributs in lecteur:
            obj = reader.getobj(attributs=attributs)
            # print(" recup objet", obj)
            if obj is None:
                continue  # filtrage entree
            reader.process(obj)
Example #14
0
def convert(filepath_or_fileobj,
            dbpath,
            table,
            headerspath_or_fileobj=None,
            compression=None,
            typespath_or_fileobj=None):
    if isinstance(filepath_or_fileobj, string_types):
        if compression is None:
            fo = open(filepath_or_fileobj, mode=read_mode)
        elif compression == 'bz2':
            try:
                fo = bz2.open(filepath_or_fileobj, mode=read_mode)
            except AttributeError:
                fo = bz2.BZ2File(filepath_or_fileobj, mode='r')
        elif compression == 'gzip':
            fo = gzip.open(filepath_or_fileobj, mode=read_mode)
    else:
        fo = filepath_or_fileobj

    try:
        dialect = csv.Sniffer().sniff(fo.readline())
    except TypeError:
        dialect = csv.Sniffer().sniff(str(fo.readline()))
    fo.seek(0)

    # get the headers
    header_given = headerspath_or_fileobj is not None
    if header_given:
        if isinstance(headerspath_or_fileobj, string_types):
            ho = open(headerspath_or_fileobj, mode=read_mode)
        else:
            ho = headerspath_or_fileobj
        header_reader = csv.reader(ho, dialect, delimiter='\t')
        headers = [header.strip() for header in next(header_reader)]
        ho.close()
    else:
        reader = csv.reader(fo, dialect, delimiter='\t')
        headers = [header.strip() for header in next(reader)]
        fo.seek(0)

    # get the types
    if typespath_or_fileobj is not None:
        if isinstance(typespath_or_fileobj, string_types):
            to = open(typespath_or_fileobj, mode=read_mode)
        else:
            to = typespath_or_fileobj
        type_reader = csv.reader(to, dialect, delimiter='\t')
        types = [_type.strip() for _type in next(type_reader)]
        to.close()
    else:
        # guess types
        type_reader = csv.reader(fo, dialect, delimiter='\t')
        if not header_given: next(type_reader)
        types = _guess_types(type_reader, len(headers))
        fo.seek(0)

    # now load data
    _columns = ','.join([
        '"%s" %s' % (header, _type) for (header, _type) in zip(headers, types)
    ])

    reader = csv.reader(fo, dialect, delimiter='\t')
    if not header_given:  # Skip the header
        next(reader)

    conn = sqlite3.connect(dbpath)
    # shz: fix error with non-ASCII input
    conn.text_factory = str
    c = conn.cursor()

    try:
        create_query = 'CREATE TABLE %s (%s)' % (table, _columns)
        c.execute(create_query)
    except:
        pass

    _insert_tmpl = 'INSERT INTO %s VALUES (%s)' % (table, ','.join(
        ['?'] * len(headers)))

    line = 0
    for row in reader:
        line += 1
        if len(row) == 0:
            continue
        # we need to take out commas from int and floats for sqlite to
        # recognize them properly ...
        try:
            row = [
                None if x == '' else float(x.replace(',', ''))
                if y == 'real' else int(x) if y == 'integer' else x
                for (x, y) in zip(row, types)
            ]
            c.execute(_insert_tmpl, row)
        except ValueError as e:
            print("Unable to convert value '%s' to type '%s' on line %d" %
                  (x, y, line),
                  file=sys.stderr)
        except Exception as e:
            print("Error on line %d: %s" % (line, e), file=sys.stderr)

    conn.commit()
    c.close()
Example #15
0
# в объекты класса диалект.

print(f'По умолчанию доступны следующие диалекты {csv.list_dialects()}')

# Но настроив все параметры вручную, можно создать и свой собственный диалект.
# подробнее об этом можно прочитать здесь:
# https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters

# Что же делать, если извне вы получаете файл, но заранее не знаете
# какие параметры диалекта в нём использовались?
# Эту проблему решит класс csv.Sniffer()
# Его метод sniff() позволяет по примеру строки
# восстановить параметры используемого диалекта:

with open('external_data/Indiana_stash.csv', 'rb') as csvfile:
    dialect = csv.Sniffer().sniff(str(csvfile.readline()), [',', ';'])
    csvfile.seek(0)
    reader = csv.reader(csvfile, dialect)
    print(dialect)  # <class 'csv.Sniffer.sniff.<locals>.dialect'>
    print(reader)  # <_csv.reader object at 0x018CFD30>

# Передав найденные параметры класс reader,
# мы с его помощью сможем корректно прочесть информацию из файла.

# Пример:
# Индиана, собираясь в очередное приключение должен отправить в бухгалтерию
# своего университета данные о закупках инструментов к предстоящей археологической экспедиции.
# За основу он взял стандартный перечень инструментов:

standart_need_list = []
Example #16
0
    def ReadCSV(self):
        '''
        *    Before looking at the SVG document, parse the CSV data. 
        *    Count the rows
        *    Initialize a dictionary with column header names as keys
            and values from the first row to be merged.
        '''

        #         if self.skipMerge:
        #             return

        # Read CSV file path, stored in a custom "MergeData" XML element
        self.csv_data_read = False
        CSVfile = None
        csvNode = None
        fileName = None

        for node in self.svg:
            if node.tag == 'svg':
                for subNode in self.svg:
                    if subNode.tag == inkex.addNS(
                            'MergeData', 'svg') or subNode.tag == 'MergeData':
                        fileName = subNode.text.encode('utf-8')
            elif node.tag == inkex.addNS('MergeData',
                                         'svg') or node.tag == 'MergeData':
                fileName = node.text.encode('utf-8')

        if fileName is not None:
            self.csv_file_path = fileName

        if self.skipMerge:
            return

        if fileName is None:
            inkex.errormsg(
                "No CSV file name selected. Use Data tab to select a CSV file."
            )
            return
        else:

            # inkex.errormsg( "File: " + str(filename))
            try:
                CSVfile = open(fileName.decode('utf-8'), 'rU')
            except:
                inkex.errormsg(
                    "CSV data file not found. Use Data tab to select a CSV file."
                )

            if CSVfile is not None:
                try:
                    CSVfile = CSVfile.read()
                except:
                    inkex.errormsg(
                        "No CSV data found in file. Use Data tab to select a CSV file."
                    )

        if CSVfile is None:
            return

        CSVfile = '\n'.join(CSVfile.splitlines())

        dialect_read = csv.Sniffer().sniff(StringIO(CSVfile).readline())

        dialect_read.doublequote = True  # Force two quotes ("") to be read as an escaped quotation mark.
        # This is a hack for excel compatibility; it may cause issues with some less common encodings.

        self.reader = csv.reader(StringIO(CSVfile), dialect_read)

        self.csv_row_count = sum(
            1 for row in self.reader) - 1  # Subtract 1 for header row

        # This count exhausts the reader by iteration; we need to reset the reader:
        self.reader = csv.DictReader(StringIO(CSVfile), dialect=dialect_read)

        if (self.csv_row_count < self.options.last_row):
            self.options.last_row = self.csv_row_count  # Limit last row of data to end of CSV file

        if (self.row_to_plot > self.csv_row_count):
            return

        # Initialize dictionary for _first row_ that we'll be merging.
        # This may not be the first data row in the file, depending on which the user has selected.

        currentRow = 1
        row = next(self.reader)
        if (self.row_to_plot <=
                self.options.last_row):  # If we are merging any rows,
            while (currentRow < self.row_to_plot
                   ):  # If we are not at the first row to merge
                row = next(self.reader)
                currentRow += 1

        self.rowDictionary = {}  # Initialize the row dictionary
        for item in self.reader.fieldnames:
            safe_text = row[item].replace('&', '&amp;')
            safe_text = safe_text.replace('<', '&lt;')
            safe_text = safe_text.replace('>', '&gt;')
            safe_text = safe_text.replace('"', '&quot;')
            safe_text = safe_text.replace("'", '&apos;')

            if self.remove_blank_rows and safe_text == "":
                self.rowDictionary['{{' + item + '}}'] = '[[EMPTY]]'
            else:
                self.rowDictionary['{{' + item + '}}'] = safe_text
Example #17
0
import csv

parser = argparse.ArgumentParser()
parser.add_argument("input_file", 
					help = "location/name of input file")
parser.add_argument("output_file", 
					help = "location/name of output file")
parser.add_argument("--in-delimiter", 
					action = 'store',
					default = None)
parser.add_argument("--in-quote",
					action = 'store',
					default = None)
args = parser.parse_args()

with open(args.input_file, newline = '') as in_csv:
	# If delimiter or quote not specified determine it using Sniffer:
	if args.in_delimiter == None or args.in_quote == None:
		dialect = csv.Sniffer().sniff(in_csv.read(1024))
		if args.in_delimiter == None:
			args.in_delimiter = dialect.delimiter
		if args.in_quote == None:
			args.in_quote = dialect.quotechar
		in_csv.seek(0)
	in_reader = csv.reader(in_csv, delimiter=args.in_delimiter, 
							quotechar = args.in_quote)
	
	with open(args.output_file, 'w', newline = '') as out_csv:
		out_writer = csv.writer(out_csv, delimiter = ",")
		for row in in_reader:
			out_writer.writerow(row)
Example #18
0
    def effect(self):
        '''Main entry point: check to see which mode/tab is selected, and act accordingly.'''

        self.versionString = "AxiDraw Merge v 2.5.3 dated 2019-06-11"
        self.spewDebugdata = False

        self.start_time = time.time()

        self.remove_blank_rows = True

        self.pageDelays = 0.0
        self.rows_plotted = 0

        self.serial_port = None
        self.svg_data_written = False
        self.csv_data_read = False
        self.csv_data_written = False
        self.csv_file_path = None
        self.csv_row_count = None

        self.delay_between_rows = False  # Not currently delaying between copies
        self.b_stopped = False  # Not currently stopped by button press

        #Values to be read from file:
        self.svg_rand_seed_Old = float(1.0)
        self.svg_row_old = float(0.0)  # Last row plotted.
        self.svg_rand_seed = float(1.0)
        self.svgRow = int(1)

        self.row_to_plot = 1

        skipSerial = False
        if self.options.preview:
            skipSerial = True

        # Input sanitization:
        self.options.mode = self.options.mode.strip("\"")
        self.options.single_type = self.options.single_type.strip("\"")
        self.options.data_action = self.options.data_action.strip("\"")
        self.options.fontface = self.options.fontface.strip("\"")
        self.options.otherfont = self.options.otherfont.strip("\"")
        self.options.setup_type = self.options.setup_type.strip("\"")
        self.options.resume_type = self.options.resume_type.strip("\"")

        if self.options.page_delay < 0:
            self.options.page_delay = 0

        if self.options.mode == "model":
            return
        if self.options.mode == "options":
            return
        if self.options.mode == "timing":
            return
        if self.options.mode == "csv":
            skipSerial = True
        if self.options.mode == "text":
            return
        if self.options.mode == "version":
            #             inkex.errormsg( gettext.gettext(self.versionString)) # Accessible from CLI only
            return

        import axidraw  # https://github.com/evil-mad/axidraw
        import hershey_advanced

        ad = axidraw.AxiDraw()
        hta = hershey_advanced.HersheyAdv()

        ad.getoptions([])
        self.svg = self.document.getroot()
        ad.ReadWCBdata(self.svg)
        self.svg_row_old = ad.svg_row_old  # Access params from ReadWCBdata

        ad.called_externally = True

        if self.options.mode == "singlePlot":
            if self.options.single_type == "queryRow":
                # No plotting; Query and report last row plotted
                inkex.errormsg('Last row merged: Row number ' +
                               str(int(self.svg_row_old)))
                inkex.errormsg('Next row to merge: Row number ' +
                               str(int(self.svg_row_old + 1)))
                return

        if skipSerial == False:
            self.serial_port = ebb_serial.openPort()
            if self.serial_port is None:
                inkex.errormsg(
                    gettext.gettext("Failed to connect to AxiDraw. :("))
                return

        self.skipMerge = True
        if self.options.mode == "autoPlot" or self.options.mode == "singlePlot"\
            or self.options.mode == "resume":
            self.xmlstr = etree.tostring(self.document,
                                         encoding='utf8',
                                         method='xml')
            if ('{{' in self.xmlstr) and ('}}' in self.xmlstr):
                self.skipMerge = False

        if self.options.mode == "autoPlot":

            pen_down_travel_inches = 0.0  # Local variable
            pen_up_travel_inches = 0.0  # Local variable
            pt_estimate = 0.0  # Local variable
            continue_plotting = True

            self.row_to_plot = int(self.options.first_row)

            if (self.options.last_row == 0
                ):  # "Continue until last row of data"
                self.options.last_row = 10000  # A large number; only limit by size of data.

            self.ReadCSV()
            if (self.csv_row_count is not None) or self.skipMerge:

                if (self.row_to_plot >
                        self.csv_row_count) and not self.skipMerge:
                    inkex.errormsg(
                        gettext.gettext(
                            "No merge data found in specified range of rows."))
                    #self.row_to_plot = ad.svg_row_old
                    continue_plotting = False

                if (self.options.last_row < self.options.first_row):
                    continue_plotting = False
                    inkex.errormsg('Nothing to plot; No data rows selected.')

                if (continue_plotting):
                    ad.backup_original = deepcopy(self.original_document)

                    while (continue_plotting):
                        self.svg_rand_seed = round(
                            time.time() *
                            100) / 100  # New random seed for new plot
                        self.mergeAndPlot(hta, ad)

                        if self.spewDebugdata:
                            inkex.errormsg('Merging row number ' +
                                           str(int(self.row_to_plot)) + '.')

                        pen_down_travel_inches = pen_down_travel_inches + ad.pen_down_travel_inches  # Local copy
                        pen_up_travel_inches = pen_up_travel_inches + ad.pen_up_travel_inches  # Local copy
                        pt_estimate = pt_estimate + ad.pt_estimate  # Local copy

                        if (
                                ad.b_stopped
                        ):  # A pause was triggered while plotting the previous row.
                            inkex.errormsg(
                                'Paused while plotting row number ' +
                                str(int(self.row_to_plot)) + '.')
                            continue_plotting = False
                        else:  # Finished plotting the row without being paused

                            self.row_to_plot = self.row_to_plot + 1

                            if (self.row_to_plot > self.options.last_row):
                                continue_plotting = False  # We have already finished the last row.
                            else:  # We will be plotting at least one more row. Delay first.
                                self.next_csv_row()
                                self.delay_between_rows = True  # Indicate that we are currently delaying between copies
                                timeCounter = 10 * self.options.page_delay  # 100 ms units
                                if self.spewDebugdata:
                                    inkex.errormsg(
                                        'Delaying ' +
                                        str(int(self.options.page_delay)) +
                                        ' seconds.')
                                while (timeCounter > 0):
                                    timeCounter = timeCounter - 1
                                    if (self.b_stopped == False):
                                        if self.options.preview:
                                            pt_estimate += 100
                                            self.pageDelays += 0.1
                                        else:
                                            time.sleep(
                                                0.100
                                            )  # Use short intervals to improve responsiveness
                                            self.PauseCheck(
                                            )  #Query if button pressed
                                self.delay_between_rows = False  # Not currently delaying between copies
                                if (self.b_stopped == True
                                    ):  # if button pressed
                                    self.row_to_plot = self.row_to_plot - 1  # Backtrack; we didn't actually get to that row.
                                    inkex.errormsg( 'Sequence halted after row number ' +\
                                        str(int(self.row_to_plot))  + '.')
                                    continue_plotting = False  # Cancel plotting sequence

                    ad.pen_down_travel_inches = pen_down_travel_inches  # Copy local values back to ad.(values)
                    ad.pen_up_travel_inches = pen_up_travel_inches  #  for printing time report.
                    ad.pt_estimate = pt_estimate
                    self.printTimeReport(ad)

        elif self.options.mode == "singlePlot":

            doPlot = True

            if self.options.single_type == "singleFix":  # Plot a specified row
                self.row_to_plot = int(self.options.single_row)
            elif self.options.single_type == "singleAdv":  # Automatically advance
                self.row_to_plot = int(self.svg_row_old + 1)
            else:
                doPlot = False

            if doPlot:
                self.svg_rand_seed = round(
                    time.time() * 100) / 100  # New random seed for new plot
                self.options.last_row = self.row_to_plot  # Last row is equal to first row, in this case.
                self.ReadCSV()
                if (self.csv_row_count is not None) or self.skipMerge:
                    if (self.row_to_plot >
                            self.csv_row_count) and not self.skipMerge:
                        inkex.errormsg( gettext.gettext( \
                            "No merge data found in row number " ) + str(self.row_to_plot) + '.')
                        #self.row_to_plot = ad.svg_row_old
                    else:
                        ad.backup_original = deepcopy(self.original_document)
                        self.mergeAndPlot(hta, ad)
                        self.printTimeReport(ad)

        elif self.options.mode == "resume":

            ad.options.mode = "resume"
            self.svg_rand_seed = ad.svg_rand_seed_old  # Preserve random seed
            self.row_to_plot = self.svg_row_old  # Preserve SVG Row
            ad.options.resume_type = self.options.resume_type

            if self.options.resume_type == "home":
                self.options.fontface = "none"  # Disable Hershey Text substegitution
                self.mergeAndPlot(hta, ad)
            elif ad.svg_application_old != "Axidraw Merge":
                inkex.errormsg(
                    gettext.gettext(
                        "No AxiDraw Merge resume data found in file."))
            elif ad.svg_layer_old == 12345:  # There appears to be a paused "all layers" plot
                self.options.last_row = self.row_to_plot
                self.ReadCSV()

                if (self.csv_row_count is not None) or self.skipMerge:
                    ad.backup_original = deepcopy(self.original_document)
                    self.mergeAndPlot(hta, ad)
                    self.printTimeReport(ad)
            else:
                inkex.errormsg(
                    gettext.gettext(
                        "No in-progress plot data found saved in file."))

        elif self.options.mode == "setup":

            if self.options.preview:
                inkex.errormsg(
                    gettext.gettext(
                        'Command unavailable while in preview mode.'))
            else:

                ad.options.mode = self.options.mode
                ad.options.setup_type = self.options.setup_type

                ad.options.pen_pos_up = self.options.pen_pos_up
                ad.options.pen_pos_down = self.options.pen_pos_down
                ad.document = self.document
                ad.options.port = self.serial_port
                ad.effect()

        elif self.options.mode == "csv":

            if self.options.data_action == "choose":
                # Select and upload a CSV file

                useGTK = False
                filename = None

                try:
                    import pygtk
                    pygtk.require('2.0')
                    import gtk  # Use gtk to create file selection dialog box.
                    useGTK = True
                except:
                    pass

                if useGTK:
                    dialog = gtk.FileChooserDialog(\
                        title="Please choose a CSV file",\
                        action=gtk.FILE_CHOOSER_ACTION_OPEN,\
                        buttons=(gtk.STOCK_CANCEL,gtk.RESPONSE_CANCEL,\
                        gtk.STOCK_OPEN,gtk.RESPONSE_OK))

                    dialog.set_default_response(gtk.RESPONSE_OK)

                    filter = gtk.FileFilter()
                    filter.set_name("Text/CSV")
                    filter.add_pattern("*.CSV")
                    filter.add_pattern("*.csv")
                    filter.add_pattern("*.txt")
                    filter.add_pattern("*.TXT")
                    filter.add_mime_type("text/csv")
                    filter.add_mime_type("text/plain")
                    filter.add_mime_type("application/csv")
                    filter.add_mime_type("application/x-csv")
                    filter.add_mime_type("text/x-csv")
                    filter.add_mime_type("text/csv")
                    filter.add_mime_type("text/comma-separated-values")
                    filter.add_mime_type("text/x-comma-separated-values")
                    filter.add_mime_type("text/tab-separated-values")
                    dialog.add_filter(filter)
                    filter = gtk.FileFilter()
                    filter.set_name("All files")
                    filter.add_pattern("*")
                    dialog.add_filter(filter)

                    response = dialog.run()

                    if response == gtk.RESPONSE_OK:
                        filename = dialog.get_filename()
                        #inkex.errormsg( "File selected: " + filename) # Print full path
                        # inkex.errormsg( "Selected file: " + str(os.path.basename(filename))) # Print file name
                    elif response == gtk.RESPONSE_CANCEL:
                        inkex.errormsg(
                            gettext.gettext('No CSV file selected.'))
                    filter.destroy()
                    dialog.destroy()

                else:  # i.e., if not useGTK. Try Tkinter,
                    useTK = False
                    try:
                        import Tkinter
                        import tkFileDialog
                        useTK = True
                    except:
                        inkex.errormsg(
                            "Unable to load TK or GTK. Please contact technical support for assistance."
                        )

                    if useTK:
                        Tkinter.Tk().withdraw()  # Close the root window
                        filename = tkFileDialog.askopenfilename(
                            title="Select CSV File")
                        if filename == "":
                            inkex.errormsg(
                                gettext.gettext('No CSV file selected.'))
                            filename = None

                if filename is not None:

                    CSVfile = open(filename, 'rU')
                    try:
                        dialect_read = csv.Sniffer().sniff(CSVfile.readline())
                    except:
                        dialect_read = None
                        inkex.errormsg( "Unable to determine format of selected file, " \
                                + str(os.path.basename(filename)) ) # Print file name

                    if dialect_read is None:
                        CSVfile.close()
                    else:
                        CSVfile.seek(0)  # Rewind file to beginning

                        reader = csv.reader(CSVfile, dialect=dialect_read)
                        CSVrowCount = sum(
                            1
                            for row in reader) - 1  # Subtract 1 for header row
                        CSVfile.seek(0)

                        if (CSVrowCount > 0):
                            CSVfile = open(filename, 'rU')
                            reader = csv.DictReader(CSVfile,
                                                    dialect=dialect_read)

                            fileName_basename = os.path.basename(
                                filename).encode('utf-8')

                            inkex.errormsg( "Found " + str(CSVrowCount) + " Rows of merge data in file " \
                                        + fileName_basename) # Print file name

                            key_names = "Column names: "
                            for item in reader.fieldnames:
                                key_names = key_names + "{{" + item + "}}, "
                            key_names = key_names[:
                                                  -2]  # drop last two characters from string (", ")
                            inkex.errormsg(key_names)  # Print key list

                            self.csv_file_path = filename.encode(
                                'utf-8')  # Path & Name of the file
                            self.storeCSVpath(
                                self.svg
                            )  # Store path & name file in our SVG file.
                        else:
                            inkex.errormsg(
                                "Unable to interpret selected file" +
                                str(os.path.basename(filename)) + ".")
                        CSVfile.close()

            elif self.options.data_action == "view":
                self.csv_data_read = False
                CSVfile = None
                csvNode = None
                for node in self.svg:
                    if node.tag == 'svg':
                        for subNode in self.svg:
                            if subNode.tag == inkex.addNS(
                                    'MergeData',
                                    'svg') or subNode.tag == 'MergeData':
                                csvNode = subNode
                    elif node.tag == inkex.addNS(
                            'MergeData', 'svg') or node.tag == 'MergeData':
                        csvNode = node
                if csvNode is not None:
                    try:
                        CSVfile = csvNode.text
                        self.csv_data_read = True
                    except:
                        self.svg.remove(
                            csvNode
                        )  # An error before this point leaves csvDataRead as False.

                if CSVfile is None:
                    inkex.errormsg(
                        "No CSV data found in file. Please select and load a CSV file."
                    )
                    return
                else:
                    inkex.errormsg("The selected CSV data file is:")
                    inkex.errormsg(CSVfile)

        if self.serial_port is not None:
            ebb_motion.doTimedPause(
                self.serial_port,
                10)  #Pause a moment for underway commands to finish...
            ebb_serial.closePort(self.serial_port)
Example #19
0
#!/usr/bin/env python

import csv

infile = "/home/pzs/histone/wigglefiles/2210_02_K4Me1.wig.txt"

fh = open(infile, "rb")

line = fh.readline()
while not line.startswith("chr"):
    line = fh.readline()

dialect = csv.Sniffer().sniff(fh.read(1024))
fh.seek(0)
print dir(dialect)
reader = csv.reader(fh, dialect)

for line in reader:
    print line
    pRanWoman = 0
    pNotRanMale = 0
    pNotRanWoman = 0
    pRanHadMarathonExperience = 0
    pRanHadNoMarathonExperience = 0
    pNotRanHadMarathonExperience = 0
    pNotRanHadNoMarathonExperience = 0
    pRanMarathonIn2014 = 0
    pRanNoMarathonIn2014 = 0
    pNotRanMarathonIn2014 = 0
    pNotRanNoMarathonIn2014 = 0
    pRanHalfMarathonIn2014 = 0
    pRanNoHalfMarathonIn2014 = 0
    pNotRanHalfMarathonIn2014 = 0
    pNotRanNoHalfMarathonIn2014 = 0
    dialect = csv.Sniffer().sniff(csvfile.read(1024))

    csvfile.seek(0)
    reader = csv.reader(csvfile, dialect)
    for row in reader:

        if row[9] == '1':
            pRan = pRan + 1
            if row[2] == '0':
                pRanWoman = pRanWoman + 1
            else:
                pRanMale = pRanMale + 1
            if int(row[1]) > 0:
                pRanHadMarathonExperience = pRanHadMarathonExperience + 1
            else:
                pRanHadNoMarathonExperience = pRanHadNoMarathonExperience + 1
Example #21
0
def read(data, path, prog_cb):

    file_size = os.stat(path).st_size

    with open(path, mode='rb') as file:

        byts = file.read(4096)
        det  = chardet.detect(byts)
        encoding = det['encoding']
        file.seek(0)

        if encoding == 'ascii':
            encoding = 'utf-8-sig'

        csvfile = TextIOWrapper(file, encoding=encoding, errors='replace')

        try:
            some_data = csvfile.read(4096)
            if len(some_data) == 4096:  # csv sniffer doesn't like partial lines
                some_data = trim_after_last_newline(some_data)
            dialect = csv.Sniffer().sniff(some_data, ', \t;')
        except csv.Error as e:
            log.exception(e)
            dialect = csv.excel

        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)

        itr = reader.__iter__()
        column_names = itr.__next__()

        column_count = 0
        column_writers = [ ]

        if len(column_names) == 0:
            column_names = ['A']

        for i in range(len(column_names)):
            column_name = column_names[i]
            data.append_column(column_name, column_name)
            column = data[i]
            column.column_type = ColumnType.DATA
            column_writers.append(ColumnWriter(column, i))
            column_count += 1

        row_count = 0

        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        first = True

        for row in reader:
            if first:
                first = False
            else:
                for i in range(column_count):
                    column_writers[i].examine_row(row)

                row_count += 1

            if row_count % 1000 == 0:
                prog_cb(int(33333 * file.tell() / file_size))

        for column_writer in column_writers:
            column_writer.ruminate()

        data.set_row_count(row_count)

        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        first = True

        row_no = 0

        for row in reader:
            if first:
                first = False
            else:
                for i in range(column_count):
                    column_writers[i].parse_row(row, row_no)
                row_no += 1

            if row_no % 1000 == 0:
                prog_cb(int(33333 + 66666 * file.tell() / file_size))
Example #22
0
def csv_has_header(csv_path, sample_size=100):
    """
    Determines if a CSV file has a header
    sniffing the sample_size first rows
    """
    return csv.Sniffer().has_header(get_csv_sample(csv_path))
Example #23
0
    with open('sample_output.csv', 'w', newline='') as file_write:
        writer = csv.writer(file_write, delimiter='|', quoting=csv.QUOTE_NONE)

        for row_read in reader:
            row_write = row_read
            row_write[0] = str(row_write[0]).upper()
            if row_write[2] == '':
                row_write[2] = '1900'

            writer.writerow(row_write)
'''
# Header Record: With
with open('sample_input.csv', 'r', newline='') as file_read:

    # Check if file has header record
    snf = csv.Sniffer().has_header(file_read.read(100))
    print('Has Header?', snf)
    file_read.seek(0)

    reader = csv.DictReader(file_read, delimiter=',', quoting=csv.QUOTE_ALL)
    fieldnames = reader.fieldnames

    with open('sample_output.csv', 'w', newline='') as file_write:
        writer = csv.DictWriter(file_write, fieldnames=fieldnames, delimiter='|', quoting=csv.QUOTE_NONE)
        writer.writeheader()

        for row_read in reader:
            row_write = row_read
            row_write['TITLE'] = str(row_write['TITLE']).upper()
            if row_write['YEAR'] == '':
                row_write['YEAR'] = '1900'
Example #24
0
def _sniff_file_info(fname, comment='#', check_header=True, quiet=False):
    """
    Infer number of header rows and delimiter of a file.

    Parameters
    ----------
    fname : string
        CSV file containing the genotype information.
    comment : string, default '#'
        Character that starts a comment row.
    check_header : bool, default True
        If True, check number of header rows, assuming a row
        that begins with a non-digit character is header.
    quiet : bool, default False
        If True, suppress output to screen.

    Returns
    -------
    n_header : int or None
        Number of header rows. None is retured if `check_header`
        is False.
    delimiter : str
        Inferred delimiter
    line : str
        The first line of data in the file.

    Notes
    -----
    .. Valid delimiters are: ['\t', ',', ';', '|', ' ']
    """

    valid_delimiters = ['\t', ',', ';', '|', ' ']

    with open(fname, 'r') as f:
        # Read through comments
        line = f.readline()
        while line != '' and line[0] == comment:
            line = f.readline()

        # Read through header, counting rows
        if check_header:
            n_header = 0
            while line != '' and (not line[0].isdigit()):
                line = f.readline()
                n_header += 1
        else:
            n_header = None

        if line == '':
            delimiter = None
            if not quiet:
                print('Unable to determine delimiter, returning None')
        else:
            # If no tab, comma, ;, |, or space, assume single entry per column
            if not any(d in line for d in valid_delimiters):
                delimiter = None
                if not quiet:
                    print('Unable to determine delimiter, returning None')
            else:
                delimiter = csv.Sniffer().sniff(line).delimiter

    # Return number of header rows and delimiter
    return n_header, delimiter, line
Example #25
0
                self.lastval=r
                fulltopic=topic+"status/"+self.topic
                logging.info("Publishing " + fulltopic)
                mqc.publish(fulltopic,self.lastval,qos=0,retain=True)
                self.last = time.time()
        except modbus_tk.modbus.ModbusError as exc:
            logging.error("Error reading "+self.topic+": Slave returned %s - %s", exc, exc.get_exception_code())
        except Exception as exc:
            logging.error("Error reading "+self.topic+": %s", exc)
            

registers=[]

# Now lets read the register definition
with open(args.registers,"r") as csvfile:
    dialect=csv.Sniffer().sniff(csvfile.read(8192))
    csvfile.seek(0)
    defaultrow={"Size":1,"Format":">H","Frequency":60,"Slave":1,"FunctionCode":4}
    reader=csv.DictReader(csvfile,fieldnames=["Topic","Register","Size","Format","Frequency","Slave","FunctionCode"],dialect=dialect)
    for row in reader:
        # Skip header row
        if row["Frequency"]=="Frequency":
            continue
        # Comment?
        if row["Topic"][0]=="#":
            continue
        if row["Topic"]=="DEFAULT":
            temp=dict((k,v) for k,v in row.iteritems() if v is not None and v!="")
            defaultrow.update(temp)
            continue
        freq=row["Frequency"]
Example #26
0
    def read(self):
        for encoding in (
                lambda: ('us-ascii', None),  # fast
                lambda: (detect_encoding(self.filename), None),  # precise
                lambda: (locale.getpreferredencoding(False), None),
                lambda: (sys.getdefaultencoding(), None),  # desperate
                lambda: ('utf-8', None),  # ...
                lambda: ('utf-8', 'ignore')):  # fallback
            encoding, errors = encoding()
            # Clear the error flag for all except the last check, because
            # the error of second-to-last check is stored and shown as warning in owfile
            if errors != 'ignore':
                error = ''
            with self.open(self.filename,
                           mode='rt',
                           newline='',
                           encoding=encoding,
                           errors=errors) as file:
                # Sniff the CSV dialect (delimiter, quotes, ...)
                try:
                    dialect = csv.Sniffer().sniff(
                        # Take first couple of *complete* lines as sample
                        ''.join(file.readline() for _ in range(10)),
                        self.DELIMITERS)
                    delimiter = dialect.delimiter
                    quotechar = dialect.quotechar
                except UnicodeDecodeError as e:
                    error = e
                    continue
                except csv.Error:
                    delimiter = self.DELIMITERS[0]
                    quotechar = csv.excel.quotechar

                file.seek(0)
                try:
                    reader = csv.reader(
                        file,
                        delimiter=delimiter,
                        quotechar=quotechar,
                        skipinitialspace=True,
                    )
                    data = self.data_table(reader)

                    # TODO: Name can be set unconditionally when/if
                    # self.filename will always be a string with the file name.
                    # Currently, some tests pass StringIO instead of
                    # the file name to a reader.
                    if isinstance(self.filename, str):
                        data.name = path.splitext(
                            path.split(self.filename)[-1])[0]
                    if error and isinstance(error, UnicodeDecodeError):
                        pos, endpos = error.args[2], error.args[3]
                        warning = ('Skipped invalid byte(s) in position '
                                   '{}{}').format(pos, ('-' + str(endpos)) if
                                                  (endpos - pos) > 1 else '')
                        warnings.warn(warning)
                    self.set_table_metadata(self.filename, data)
                    return data
                except Exception as e:
                    error = e
                    continue
        raise ValueError('Cannot parse dataset {}: {}'.format(
            self.filename, error)) from error
Example #27
0
 def test_has_header(self):
     sniffer = csv.Sniffer()
     self.assertEqual(sniffer.has_header(self.sample1), False)
     self.assertEqual(sniffer.has_header(self.header + self.sample1), True)
Example #28
0
def browseFile(btn):
	global address
	global data
	global header
	global dictionary
	dictionary={}

	try:
		if(filetype.get()):
			if(filetype.get()==1):
				root.filename =  filedialog.askopenfilename()
				print(root.filename)
				csv_fileh = open(root.filename, 'r',encoding="ISO-8859-1")

				try:
				    dialect = csv.Sniffer().sniff(csv_fileh.readline())
				    csv_fileh.seek(100)
				except csv.Error:
					label1.config(text="Incompatible format...Choose your file again",relief=RIDGE)
					print(address)
				else:	
					address = root.filename
					label1.config(text="File Address is:   "+address,relief=RIDGE)

					data = pd.read_csv(root.filename,encoding="ISO-8859-1")
					data = data.replace(np.nan, 0)
					data = data.replace(np.inf, 0)


					with open(root.filename, "r",encoding="ISO-8859-1") as f:
					    reader = csv.reader(f)
					    header = next(reader)
					i=1
					for each in header:
						dictionary[i]=each
						i=i+1
 
					btn.config(state="active")



			#Excel read		
			else:
				root.filename =  filedialog.askopenfilename()
				print(root.filename)
				try:
					open_workbook(root.filename,'r')
				except XLRDError:
					label1.config(text="Incompatible format...Choose your file again",relief=RIDGE)
					print(address)
				else:
					address = root.filename
					label1.config(text="File Address is:   "+address,relief=RIDGE)
					data = pd.read_excel(root.filename, sheet_name=None)
					data = data.replace(np.nan, 0)
					data = data.replace(np.inf, 0)
					# data=data.as_matrix()	
					header =pd.read_excel(root.filename).columns.tolist()
					i=1
					for each in header:
						dictionary[i]=each
						i=i+1
					btn.config(state="active")	
	


		else:
			label1.config(text="No option choosen",relief=RIDGE)


	except:
		label1.config(text="Incompatible format...Choose your file again",relief=RIDGE)	
		btn.config(state="disabled")	
def get_csv_reader(csv_file):
    dialect = csv.Sniffer().sniff(csv_file.readline())
    csv_file.seek(0)
    return csv.reader(csv_file, dialect)
Example #30
0
 def test_has_header_regex_special_delimiter(self):
     sniffer = csv.Sniffer()
     self.assertEqual(sniffer.has_header(self.sample8), False)
     self.assertEqual(sniffer.has_header(self.header2 + self.sample8), True)