Exemple #1
0
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = StringIO.StringIO()
     print csv.list_dialects()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoding = encoding
Exemple #2
0
 def __init__(self, vid, names, infile, outfile,
              startrow=0, colkey=0, colstartvalue=1, langs='en',
              dialect='excel', delimiter=';', treevocabdelimiter='.',
              ordered=True, encoding='utf-8', description=False):
     self.vid = vid
     if isinstance(langs, basestring):
         langs = [_.strip() for _ in langs.split(',')]
     if isinstance(names, basestring):
         names = [_.strip() for _ in names.split(',')]
     self.names = OrderedDict(zip(langs, names))
     self.infile = infile
     self.outfile = outfile
     self.startrow = startrow
     self.colkey = colkey
     self.colstartvalue = colstartvalue
     self.langs = langs
     self.delimiter = delimiter
     self.treevocabdelimiter = treevocabdelimiter
     self.ordered = ordered
     self.description = description
     if dialect not in csv.list_dialects():
         raise ValueError(
             "given csv dialect '%s' is unknown. " % dialect + \
             "pick one of theses: %s" % csv.list_dialects()
         )
     self.dialect = dialect
     self.encoding = encoding
def main(args):
    verbose = args.verbose
    annotations_file = args.annotations_file
    jiras_file = args.jira_file
    teamcity_file = args.teamcity_file
    location = args.find_missing
    if verbose:
        print csv.list_dialects()
        print os.getcwd()

    annotations = parse_tsv(annotations_file, "annotation", verbose)
    jiras = parse_tsv(jiras_file, "jira", verbose)
    teamcity_failures = parse_tsv(teamcity_file, "failure", verbose)

    if location == 'jira' or location == 'all':
        print(
            "Tests annotated as unstable or failing in TeamCity missing an issue in JIRA:"
        )
        print_diffs(jiras, annotations, teamcity_failures, verbose)

    if location == 'annotation' or location == 'all':
        print(
            "Tests with a random failure issue in JIRA or failing in TeamCity missing the unstable annotation:"
        )
        print_diffs(annotations, jiras, teamcity_failures, verbose)

    if location == 'teamcity' or location == 'all':
        print(
            "Tests annotated as unstable or with a random failure issue in JIRA but not failing in TeamCity:"
        )
        print_diffs(teamcity_failures, annotations, jiras, verbose)
 def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     # Redirect output to a queue
     self.queue = StringIO.StringIO()
     print csv.list_dialects()
     self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
     self.stream = f
     self.encoding = encoding
def main(args):
    verbose = args.verbose
    annotations_file = args.annotations_file
    jiras_file = args.jira_file
    teamcity_file = args.teamcity_file
    location = args.find_missing
    if verbose:
        print csv.list_dialects()
        print os.getcwd()

    annotations = parse_tsv(annotations_file, "annotation", verbose)
    jiras = parse_tsv(jiras_file, "jira", verbose)
    teamcity_failures = parse_tsv(teamcity_file, "failure", verbose)

    if location == "jira" or location == "all":
        print ("Tests annotated as unstable or failing in TeamCity missing an issue in JIRA:")
        print_diffs(jiras, annotations, teamcity_failures, verbose)

    if location == "annotation" or location == "all":
        print ("Tests with a random failure issue in JIRA or failing in TeamCity missing the unstable annotation:")
        print_diffs(annotations, jiras, teamcity_failures, verbose)

    if location == "teamcity" or location == "all":
        print ("Tests annotated as unstable or with a random failure issue in JIRA but not failing in TeamCity:")
        print_diffs(teamcity_failures, annotations, jiras, verbose)
Exemple #6
0
def main():
    """
    Command-line processor. See ``--help`` for details.
    """
    main_only_quicksetup_rootlogger()

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "filenames",
        nargs="+",
        help="Names of CSV/TSV files to merge"
    )
    parser.add_argument(
        "--outfile",
        default="-",
        help="Specify an output filename. If omitted or '-', stdout is used.",
    )
    parser.add_argument(
        "--inputdialect",
        default="excel",
        help="The input files' CSV/TSV dialect. Default: %(default)s.",
        choices=csv.list_dialects(),
    )
    parser.add_argument(
        "--outputdialect",
        default="excel",
        help="The output file's CSV/TSV dialect. Default: %(default)s.",
        choices=csv.list_dialects(),
    )
    parser.add_argument(
        "--noheaders",
        action="store_true",
        help="By default, files are assumed to have column headers. "
             "Specify this option to assume no headers.",
    )
    parser.add_argument(
        "--debug",
        action="store_true",
        help="Verbose debugging output.",
    )
    progargs = parser.parse_args()

    kwargs = {
        "filenames": progargs.filenames,
        "input_dialect": progargs.inputdialect,
        "output_dialect": progargs.outputdialect,
        "debug": progargs.debug,
        "headers": not progargs.noheaders,
    }
    if progargs.outfile == '-':
        log.info("Writing to stdout")
        merge_csv(outfile=sys.stdout, **kwargs)
    else:
        log.info("Writing to " + repr(progargs.outfile))
        with open(progargs.outfile, 'w') as outfile:
            # noinspection PyTypeChecker
            merge_csv(outfile=outfile, **kwargs)
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     self.addCleanup(csv.unregister_dialect, name)
     self.assertEqual(csv.get_dialect(name).delimiter, '\t')
     got_dialects = sorted(csv.list_dialects())
     self.assertEqual(expected_dialects, got_dialects)
Exemple #8
0
    def __init__(self):
        """
        Init the argparse parser.
        """
        self.parser = argparse.ArgumentParser(
            add_help=False,
            description=(
                'evaluate the cognates clustering of a dataset '
                'against the same data\'s gold-standard cognate classes'))

        self.parser.add_argument(
            'dataset_true',
            help=
            ('path to the dataset containing the gold-standard cognate classes'
             ))
        self.parser.add_argument(
            'dataset_pred',
            help=(
                'path to the dataset containing the predicted cognate classes'
            ))

        csv_args = self.parser.add_argument_group('optional arguments')
        csv_args.add_argument(
            '--type-true',
            choices=["csv", "cldf"],
            default="csv",
            help=('Is the reference dataset a CSV or a CLDF dataset?'))
        csv_args.add_argument(
            '--type-pred',
            choices=["csv", "cldf"],
            default="csv",
            help=('Is the prediction dataset a CSV or a CLDF dataset?'))

        csv_args.add_argument(
            '--dialect-true',
            choices=csv.list_dialects(),
            help=('the csv dialect to use for reading the dataset '
                  'that contains the gold-standard cognate classes; '
                  'the default is to look at the file extension '
                  'and use excel for .csv and excel-tab for .tsv'))
        csv_args.add_argument(
            '--dialect-pred',
            choices=csv.list_dialects(),
            help=('the csv dialect to use for reading the dataset '
                  'that contains the predicted cognate classes; '
                  'the default is to look at the file extension '
                  'and use excel for .csv and excel-tab for .tsv'))

        other_args = self.parser.add_argument_group(
            'optional arguments - other')
        other_args.add_argument('-h',
                                '--help',
                                action='help',
                                help=('show this help message and exit'))
Exemple #9
0
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     self.addCleanup(csv.unregister_dialect, name)
     self.assertEqual(csv.get_dialect(name).delimiter, '\t')
     got_dialects = sorted(csv.list_dialects())
     self.assertEqual(expected_dialects, got_dialects)
def option_parser(get_parser, args):
    parser = get_parser(
        _(
            '''\
%prog list_categories [options]

Produce a report of the category information in the database. The
information is the equivalent of what is shown in the Tag browser.
'''
        )
    )

    parser.add_option(
        '-i',
        '--item_count',
        default=False,
        action='store_true',
        help=_(
            'Output only the number of items in a category instead of the '
            'counts per item within the category'
        )
    )
    parser.add_option(
        '-c', '--csv', default=False, action='store_true', help=_('Output in CSV')
    )
    parser.add_option(
        '--dialect',
        default='excel',
        choices=csv.list_dialects(),
        help=_('The type of CSV file to produce. Choices: {}')
        .format(', '.join(sorted(csv.list_dialects())))
    )
    parser.add_option(
        '-r',
        '--categories',
        default='',
        dest='report',
        help=_("Comma-separated list of category lookup names. "
               "Default: all")
    )
    parser.add_option(
        '-w',
        '--width',
        default=-1,
        type=int,
        help=_(
            'The maximum width of a single line in the output. '
            'Defaults to detecting screen size.'
        )
    )
    return parser
def option_parser(get_parser, args):
    parser = get_parser(
        _(
            '''\
%prog list_categories [options]

Produce a report of the category information in the database. The
information is the equivalent of what is shown in the tags pane.
'''
        )
    )

    parser.add_option(
        '-i',
        '--item_count',
        default=False,
        action='store_true',
        help=_(
            'Output only the number of items in a category instead of the '
            'counts per item within the category'
        )
    )
    parser.add_option(
        '-c', '--csv', default=False, action='store_true', help=_('Output in CSV')
    )
    parser.add_option(
        '--dialect',
        default='excel',
        choices=csv.list_dialects(),
        help=_('The type of CSV file to produce. Choices: {}')
        .format(', '.join(sorted(csv.list_dialects())))
    )
    parser.add_option(
        '-r',
        '--categories',
        default='',
        dest='report',
        help=_("Comma-separated list of category lookup names. "
               "Default: all")
    )
    parser.add_option(
        '-w',
        '--width',
        default=-1,
        type=int,
        help=_(
            'The maximum width of a single line in the output. '
            'Defaults to detecting screen size.'
        )
    )
    return parser
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     try:
         self.failUnless(isinstance(csv.get_dialect(name), myexceltsv))
         got_dialects = csv.list_dialects()
         got_dialects.sort()
         self.assertEqual(expected_dialects, got_dialects)
     finally:
         csv.unregister_dialect(name)
Exemple #13
0
 def test_registry(self):
     class myexceltsv(csv.excel):
         delimiter = "\t"
     name = "myexceltsv"
     expected_dialects = csv.list_dialects() + [name]
     expected_dialects.sort()
     csv.register_dialect(name, myexceltsv)
     try:
         self.failUnless(isinstance(csv.get_dialect(name), myexceltsv))
         got_dialects = csv.list_dialects()
         got_dialects.sort()
         self.assertEqual(expected_dialects, got_dialects)
     finally:
         csv.unregister_dialect(name)
Exemple #14
0
def main():
    parser = argparse.ArgumentParser(
        description='Concatenate comma separated value files.',
        add_help=False,
    )
    parser.add_argument(
        '--help', '-h',
        action=HelpAction,
        nargs=0,
        help='show this help message and exit',
    )
    parser.add_argument(
        '--skip-headers',
        help=('treat the first line of each file as a header,'
              'and only include one copy in the output.'),
        action='store_true',
        default=False,
    )
    parser.add_argument(
        '--columns', '--col', '-c',
        help=("limit the output to the specified columns."
              "Columns are identified by number, starting with 0."),
        default=[],
        action='append',
    )
    parser.add_argument(
        '--dialect', '-d',
        help=('specify the output dialect name.'
              'Defaults to %(default)s.'),
        default='excel',
        choices=csv.list_dialects(),
    )
    parser.add_argument(
        'filename',
        nargs='+',
        help='files to process',
    )
    args = parser.parse_args()

    columns = _get_column_nums_from_args(args.columns)
    writer = csv.writer(sys.stdout, dialect=args.dialect)
    headers_written = False

    for filename in args.filename:
        with open(filename, 'r') as f:
            reader = csv.reader(f)
            if args.skip_headers:
                if not headers_written:
                    # This row must include the headers for the output
                    headers = reader.next()
                    writer.writerow(_get_printable_columns(columns, headers))
                    headers_written = True
                else:
                    # We have seen headers before, and are skipping,
                    # so do not write the first row of this file.
                    reader.next()

            # Process the rest of the file
            for row in reader:
                writer.writerow(_get_printable_columns(columns, row))
Exemple #15
0
def register_csv_parse_command(add_parser):
    parser = add_parser(
        'csv_parse',
        help='Create RDF graph out of manual tagging done as CSV.',
        description='Create RDF graph out of manual tagging done as CSV. '
        'The first row should be column labels, with "Dataset" '
        'denoting the column with dataset URIs, and either '
        '"Concepts" has a comma-separated list of concepts (either '
        'full or just the identifying part of the concept URI), or '
        'each concept has its own column, where any non-empty cells'
        ' indicate an association. If you use the csv_prepare '
        'command, you get a properly formatted CSV to fill in.',
    )
    parser.add_argument(
        '--check',
        help='Check the CSV for unrecognized concepts, then exit. This way, '
        'all unrecognized concepts are reported at once.',
        action='store_true',
    )
    parser.add_argument(
        '--dialect',
        '-d',
        help='The dialect to use when parsing the CSV file. '
        '(Default: %(default)s)',
        choices=csv.list_dialects(),
        default='excel',
    )
    parser.add_argument(
        'csv_file',
        help='Path to CSV file to convert to RDF.',
    )
    register_arguments_for_rdf_output(parser)
    parser.set_defaults(func=do_csv_parse_command)
Exemple #16
0
 def add_arguments(cls, parser):
     parser.add_argument(
         "--dialect", metavar="DIALECT", choices=csv.list_dialects(), default="excel",
         help="format CSV file according to DIALECT")
     parser.add_argument(
         "csv_file", metavar="CSV-FILE", type=argparse.FileType("w"),
         help="write data to CSV-FILE")
Exemple #17
0
def createConfig(args):
    '''Interactively creates a configuation file'''

    name = raw_input('your name: ')
    iban = raw_input('your IBAN: ')
    bic = raw_input('your BIC: ')
    creditorId = raw_input('your creditor id: ')
    csvDialect = raw_input('CSV dialect [%s]: ' %
                           ' '.join(sorted(csv.list_dialects())))

    # we use a PySepaDD-compatible configuration dict for simplicity
    config = {
        'name': name,
        'IBAN': iban,
        'BIC': bic,
        'creditor_id': creditorId,
        'currency': DEFAULT_CURRENCY,
        'batch': DEFAULT_BATCH,
        'csv_dialect': csvDialect,
    }

    with open(args.config, 'wb') as f:
        pprint.pprint(config, stream=f, indent=4)

    print 'Configuration written to file %s. ' \
          'You can edit this file with a text ' \
          'editor if you need to change something later.' % args.config
Exemple #18
0
def createConfig(args):
    '''Interactively creates a configuation file'''

    name = raw_input('your name: ')
    iban = raw_input('your IBAN: ')
    bic = raw_input('your BIC: ')
    creditorId = raw_input('your creditor id: ')
    csvDialect = raw_input('CSV dialect [%s]: ' % ' '.join(sorted(csv.list_dialects())))

    # we use a PySepaDD-compatible configuration dict for simplicity
    config = {
            'name': name,
            'IBAN': iban,
            'BIC':  bic,
            'creditor_id': creditorId,
            'currency': DEFAULT_CURRENCY,
            'batch': DEFAULT_BATCH,
            'csv_dialect': csvDialect,
    }

    with open(args.config, 'wb') as f:
        pprint.pprint(config, stream=f, indent=4)

    print 'Configuration written to file %s. ' \
          'You can edit this file with a text ' \
          'editor if you need to change something later.' % args.config
def main():
    print('Starting CSV Exmaple')
    print(csv.list_dialects())

    print('Crearting CSV file')
    with open('sample.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['She Loves You', 'Sept 1963'])
        writer.writerow(['I Want to Hold Your Hand', 'Dec 1963'])
        writer.writerow(['Cant Buy Me Love', 'Apr 1964'])
        writer.writerow(['A Hard Days Night', 'July 1964'])

    print('-' * 100)

    print('Starting to read csv file')
    with open('sample.csv', newline='') as csvfile:
        reader = csv.reader(csvfile)
        # Process each row in the csv file
        for row in reader:
            row_length = len(row)
            print('row_length', row_length)
            for i in range(row_length):
                # Each element in the row can be accessed via an index
                print(row[i], end=', ')
            print()
    print('Done Reading')
Exemple #20
0
    def __init__(self, *args, **kwds):

        self.data = kwds.pop('data')

        kwds["style"] = \
            wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.THICK_FRAME

        wx.Dialog.__init__(self, *args, **kwds)

        self.csvwidgets = CsvParameterWidgets(self, None)
        dialect = csv.get_dialect(csv.list_dialects()[0])
        self.has_header = False

        style = wx.TE_MULTILINE | wx.TE_READONLY | wx.HSCROLL
        self.preview_textctrl = CSVPreviewTextCtrl(self, -1, style=style)

        self.button_cancel = wx.Button(self, wx.ID_CANCEL, "")
        self.button_apply = wx.Button(self, wx.ID_APPLY, "")
        self.button_ok = wx.Button(self, wx.ID_OK, "")

        self._set_properties()
        self._do_layout()

        self.preview_textctrl.fill(data=self.data, dialect=dialect)

        self.Bind(wx.EVT_BUTTON, self.OnButtonApply, self.button_apply)
Exemple #21
0
def main():
    parser = argparse.ArgumentParser(
        description='Concatenate comma separated value files.',
        add_help=False,
    )
    parser.add_argument(
        '--help', '-h',
        action=HelpAction,
        nargs=0,
        help='show this help message and exit',
    )
    parser.add_argument(
        '--skip-headers',
        help=('treat the first line of each file as a header,'
              'and only include one copy in the output.'),
        action='store_true',
        default=False,
    )
    parser.add_argument(
        '--columns', '--col', '-c',
        help=("limit the output to the specified columns."
              "Columns are identified by number, starting with 0."),
        default=[],
        action='append',
    )
    parser.add_argument(
        '--dialect', '-d',
        help=('specify the output dialect name.'
              'Defaults to %(default)s.'),
        default='excel',
        choices=csv.list_dialects(),
    )
    parser.add_argument(
        'filename',
        nargs='+',
        help='files to process',
    )
    args = parser.parse_args()

    columns = _get_column_nums_from_args(args.columns)
    writer = csv.writer(sys.stdout, dialect=args.dialect)
    headers_written = False

    for filename in args.filename:
        with open(filename, 'r') as f:
            reader = csv.reader(f)
            if args.skip_headers:
                if not headers_written:
                    # This row must include the headers for the output
                    headers = next(reader)
                    writer.writerow(_get_printable_columns(columns, headers))
                    headers_written = True
                else:
                    # We have seen headers before, and are skipping,
                    # so do not write the first row of this file.
                    next(reader)

            # Process the rest of the file
            for row in reader:
                writer.writerow(_get_printable_columns(columns, row))
Exemple #22
0
def parse_args():
    parser = argparse.ArgumentParser(
        description='Converts a CSV file to an OSM file')
    parser.add_argument('csv_file', help='CSV file to read')
    parser.add_argument('output_file', help='Output file name')
    parser.add_argument(
        '--csv-dialect',
        default='excel',
        help=
        'The csv dialect, i.e. the algorithm used to interpret the textual data. '
        'Can be one of: ' + ', '.join(csv.list_dialects()))
    parser.add_argument(
        '--csv-encoding',
        default='utf-8',
        help='Character encoding of the CSV file. Examples: utf-8, latin1')
    parser.add_argument('--lon',
                        dest='longitude_field',
                        default='longitude',
                        help='Name of the field that contains the longitude')
    parser.add_argument('--lat',
                        dest='latitude_field',
                        default='latitude',
                        help='Name of the field that contains the latitude')
    parser.add_argument(
        '--translator',
        help='Python file to import that '
        'contains special translation methods to transform the tags.')
    parser.add_argument('-f',
                        '--force',
                        dest='force_overwrite',
                        action='store_true',
                        help='Force overwriting the destination file.')
    return parser.parse_args()
Exemple #23
0
def main():
    file = input('Enter the name of the input file: ')
    in_dialect = input('and its dialect: ')
    out_file = input('Enter the name of the output file: ')
    out_dialect = input('and its dialect: ')

    all_dialects = csv.list_dialects()


    if in_dialect not in all_dialects or out_dialect not in all_dialects:
        print('\nThe given dialect is wrong.')
        return

    try:
        f = open(file, newline='')
        reader = csv.reader(f, dialect=in_dialect)
    except IOError:
        print('\nThere was an error in handling the file.')
        return

    with open(out_file, 'w', newline='') as f:
        try:
            writer = csv.writer(f, dialect=out_dialect)
            for row in reader:
                writer.writerow(row)
        except IOError:
            print('\nThere was an error in handling the file.')
            return

    print(f'\nFile {file} has been converted into {out_dialect}.')
Exemple #24
0
 def write_directory(self,
                     tic_dat,
                     dir_path,
                     allow_overwrite=False,
                     dialect='excel',
                     write_header=True):
     """
     write the ticDat data to a collection of csv files
     :param tic_dat: the data object
     :param dir_path: the directory in which to write the csv files
     :param allow_overwrite: boolean - are we allowed to overwrite existing
                             files?
     :param dialect: the csv dialect. Consult csv documentation for details.
     :param write_header: Boolean. Should the header information be written
                          as the first row?
     :return:
     """
     verify(csv, "csv needs to be installed to use this subroutine")
     verify(dialect in csv.list_dialects(), "Invalid dialect %s" % dialect)
     verify(not os.path.isfile(dir_path),
            "A file is not a valid directory path")
     if self.tic_dat_factory.generic_tables:
         dat, tdf = create_generic_free(tic_dat, self.tic_dat_factory)
         return tdf.csv.write_directory(dat, dir_path, allow_overwrite,
                                        dialect, write_header)
     tdf = self.tic_dat_factory
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(
             tic_dat, lambda m: msg.append(m)):
         raise TicDatError("Not a valid TicDat object for this schema : " +
                           " : ".join(msg))
     if not allow_overwrite:
         for t in tdf.all_tables:
             f = os.path.join(dir_path, t + ".csv")
             verify(not os.path.exists(f),
                    "The %s path exists and overwrite is not allowed" % f)
     if not os.path.isdir(dir_path):
         os.mkdir(dir_path)
     for t in tdf.all_tables:
         f = os.path.join(dir_path, t + ".csv")
         with open(f, 'w') as csvfile:
             writer = csv.DictWriter(
                 csvfile,
                 dialect=dialect,
                 fieldnames=tdf.primary_key_fields.get(t, ()) +
                 tdf.data_fields.get(t, ()))
             writer.writeheader() if write_header else None
             _t = getattr(tic_dat, t)
             if dictish(_t):
                 for p_key, data_row in _t.items():
                     primaryKeyDict = {
                         f: v
                         for f, v in zip(
                             tdf.primary_key_fields[t],
                             p_key if containerish(p_key) else (p_key, ))
                     }
                     writer.writerow(dict(data_row, **primaryKeyDict))
             else:
                 for data_row in (_t if containerish(_t) else _t()):
                     writer.writerow(dict(data_row))
 def to_numpy_array(self, file, filename, dialect=None, has_header=False):
     if dialect is None:
         dialect, has_header = self.snif_csv_dialect(file)
     else:
         assert dialect in csv.list_dialects()
     reader = csv.reader(file, dialect)
     if has_header:
         reader.next()
     series = []
     # TODO: check granularity if we have a date column
     prefs = self._cw.user.format_preferences[0]
     dec_sep = prefs.decimal_separator
     th_sep = prefs.thousands_separator or ''
     for line, values in enumerate(reader):
         if len(values) not in (1, 2):
             raise ValueError('Too many columns in %s' % filename)
         try:
             strval = values[-1].replace(th_sep, '').replace(dec_sep, '.')
             val = float(strval)
         except ValueError:
             if line == 0 and not has_header:
                 self.debug('error while parsing first line of %s',
                            filename)
                 continue  # assume there was a header
             else:
                 raise ValueError(
                     'Invalid data type for value %s on line %s of %s' %
                     (values[-1], reader.line_num, filename))
         series.append(val)
     return numpy.array(series, dtype=self.entity.dtype)
Exemple #26
0
def get_dialects_list():
    """ Return a tuple with all the available dialect names registered. """
    registered = tuple(i['name'] for i in csv_dialects)
    # remove duplicated, since the ones we register through the manager
    # could (should) be set in csv module too
    unique = set([*registered, *csv.list_dialects()])
    return tuple(unique)
Exemple #27
0
def _read_config():
    """Reads the general config file and returns the resulting config object.

    Other modules can get the config object by accessing the
    utils.config variable.

    """
    config = {'camera-dev': '0',
              'save-filename-pattern': 'exam-{student-id}-{seq-number}.png',
              'csv-dialect': 'tabs',
              'default-charset': 'utf8', # special value: 'system-default'
              }
    parser = ConfigParser.SafeConfigParser()
    parser.read([os.path.expanduser('~/.eyegrade.cfg'),
                 os.path.expanduser('~/.camgrade.cfg')])
    if 'default' in parser.sections():
        for option in parser.options('default'):
            config[option] = parser.get('default', option)
    if not config['csv-dialect'] in csv.list_dialects():
        config['csv-dialect'] = 'tabs'
    if 'error-logging' in config and config['error-logging'] == 'yes':
        config['error-logging'] = True
    else:
        config['error-logging'] = False
    config['camera-dev'] = int(config['camera-dev'])
    if config['default-charset'] == 'system-default':
        config['default-charset'] = locale.getpreferredencoding()
    return config
Exemple #28
0
    def __init__(self, *args, **kwds):

        self.data = kwds.pop('data')

        kwds["style"] = \
            wx.DEFAULT_DIALOG_STYLE | wx.RESIZE_BORDER | wx.THICK_FRAME

        wx.Dialog.__init__(self, *args, **kwds)

        self.csvwidgets = CsvParameterWidgets(self, None)
        dialect = csv.get_dialect(csv.list_dialects()[0])
        self.has_header = False

        style = wx.TE_MULTILINE | wx.TE_READONLY | wx.HSCROLL
        self.preview_textctrl = CSVPreviewTextCtrl(self, -1, style=style)

        self.button_cancel = wx.Button(self, wx.ID_CANCEL, "")
        self.button_apply = wx.Button(self, wx.ID_APPLY, "")
        self.button_ok = wx.Button(self, wx.ID_OK, "")

        self._set_properties()
        self._do_layout()

        self.preview_textctrl.fill(data=self.data, dialect=dialect)

        self.Bind(wx.EVT_BUTTON, self.OnButtonApply, self.button_apply)
    def __init__(self, path, dialect=None, transform=None):
        """
        Set the instance's props. Raise a DatasetError if the given file path
        does not exist. 

        The dialect arg should be either a string identifying one of the csv
        dialects or None, in which case the dialect is inferred based on the
        file extension. Raise a ValueError if the given dialect is specified
        but unrecognised.

        If is_ipa is set, assume that the transcriptions are in IPA and convert
        them into some other sound class model.
        """
        if not Path(path).exists():
            raise DatasetError('Could not find file: {}'.format(path))

        if dialect is None:
            dialect = 'excel-tab' if path.endswith('.tsv') else 'excel'
        elif dialect not in csv.list_dialects():
            raise ValueError('Unrecognised csv dialect: {!s}'.format(dialect))

        self.path = path
        self.dialect = dialect
        self.transform = transform

        self.alphabet = None
Exemple #30
0
 def find_duplicates(self, dir_path, dialect='excel', headers_present = True):
     """
     Find the row counts for duplicated rows.
     :param dir_path: the directory containing .csv files.
     :param dialect: the csv dialect. Consult csv documentation for details.
     :param headers_present: Boolean. Does the first row of data contain
                             the column headers?
     :return: A dictionary whose keys are the table names for the primary key tables.
              Each value of the return dictionary is itself a dictionary.
              The inner dictionary is keyed by the primary key values encountered
              in the table, and the value is the count of records in the
              Excel sheet with this primary key.
              Row counts smaller than 2 are pruned off, as they aren't duplicates
     caveats: Missing files resolve to an empty table, but missing fields (data or primary key) on
              matching files throw an Exception.
     """
     verify(csv, "csv needs to be installed to use this subroutine")
     verify(dialect in csv.list_dialects(), "Invalid dialect %s"%dialect)
     verify(os.path.isdir(dir_path), "Invalid directory path %s"%dir_path)
     tdf = self.tic_dat_factory
     rtn = {t:defaultdict(int) for t,_ in tdf.primary_key_fields.items()
            if _ and self._get_file_path(dir_path, t)}
     for t in rtn:
         with open(self._get_file_path(dir_path, t)) as csvfile:
             for r in self._get_data(csvfile, t, dialect, headers_present):
                 p_key = r[tdf.primary_key_fields[t][0]] \
                         if len(tdf.primary_key_fields[t])==1 else \
                         tuple(r[_] for _ in tdf.primary_key_fields[t])
                 rtn[t][p_key] += 1
     for t in list(rtn.keys()):
         rtn[t] = {k:v for k,v in rtn[t].items() if v > 1}
         if not rtn[t]:
             del(rtn[t])
     return rtn
Exemple #31
0
def initialize_dialect(dialect, **kwargs):
    # Add Unix dialect from Python 3
    class unix_dialect(csv.Dialect):
        """Describe the usual properties of Unix-generated CSV files."""
        delimiter = ','
        quotechar = '"'
        doublequote = True
        skipinitialspace = False
        lineterminator = '\n'
        quoting = csv.QUOTE_ALL

    csv.register_dialect("unix", unix_dialect)

    if dialect not in csv.list_dialects():
        raise DialectNotAvailableError(
            "Dialect '%s' is not supported by your version of python." %
            dialect)

    # Create a dictionary from only set options
    dialect_params = dict((k, v) for k, v in kwargs.items() if v is not None)
    if dialect_params:
        try:
            csv.register_dialect('custom', dialect, **dialect_params)
        except TypeError as e:
            raise CustomDialectFailureError(
                "Unable to create custom dialect: %s" % to_native(e))
        dialect = 'custom'

    return dialect
Exemple #32
0
 def _create_tic_dat(self, dir_path, dialect, headers_present):
     verify(dialect in csv.list_dialects(), "Invalid dialect %s" % dialect)
     verify(os.path.isdir(dir_path), "Invalid directory path %s" % dir_path)
     rtn = {
         t: self._create_table(dir_path, t, dialect, headers_present)
         for t in self.tic_dat_factory.all_tables
     }
     return {k: v for k, v in rtn.items() if v}
Exemple #33
0
def _initCsvDATDialect():
    if 'DAT' not in csv.list_dialects():
        csv.register_dialect('DAT',
                             delimiter='\t',
                             doublequote=False,
                             escapechar=None,
                             lineterminator='\n',
                             quoting=csv.QUOTE_NONE)
Exemple #34
0
 def showVerboseHelp(self):
     commandlineapp.CommandLineApp.showVerboseHelp(self)
     print
     print 'OUTPUT DIALECTS:'
     print
     for name in csv.list_dialects():
         print '\t%s' % name
     print
     return
Exemple #35
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-D', '--dialect', choices=csv.list_dialects())
    parser.add_argument('-tD', '--to-dialect', default='excel-tab', choices=csv.list_dialects())
    parser.add_argument('INPUT', type=argparse.FileType())

    args = parser.parse_args()

    if not args.dialect:
        sniffer = csv.Sniffer()
        args.dialect = sniffer.sniff(args.INPUT.read(1024))
        args.INPUT.seek(0)

    with args.INPUT as f:
        reader = csv.reader(f, dialect=args.dialect)
        writer = csv.writer(sys.stdout, dialect=args.to_dialect)

        writer.writerows(reader)
Exemple #36
0
def get_power_params(qc_path, file_):

    import csv

    subj_dir = os.path.dirname(qc_path)

    subj_dir = os.path.join(subj_dir, 'power_params')

    scans = os.listdir(subj_dir)

    meanFD = None
    meanDVARS = None

    for scan in scans:

        if scan in file_:

            subj_dir = os.path.join(subj_dir, scan)

            threshold = '_threshold_'

            if 'SCRUB_' in file_:
                threshold += (file_.split('SCRUB_')[1]).split('_')[0]
                subj_dir = os.path.join(subj_dir, threshold)
            else:

                subj_dir = os.path.join(subj_dir, os.listdir(subj_dir)[0])


            params_file = os.path.join(subj_dir, os.listdir(subj_dir)[0])
            csv_file = csv.DictReader(open(params_file, 'rb'), delimiter=',')
            print 'params_file: ', params_file
            print csv.list_dialects()

            line = None

            for line in csv_file:

                meanFD = line['MeanFD']
                meanDVARS = line['MeanDVARS']
                print meanFD, meanDVARS
                
                return meanFD, meanDVARS
Exemple #37
0
 def _create_tic_dat(self, dir_path, dialect, headers_present, encoding):
     verify(dialect in csv.list_dialects(), "Invalid dialect %s"%dialect)
     verify(os.path.isdir(dir_path), "Invalid directory path %s"%dir_path)
     rtn =  {t : self._create_table(dir_path, t, dialect, headers_present, encoding)
             for t in self.tic_dat_factory.all_tables}
     missing_tables = {t for t in self.tic_dat_factory.all_tables if not rtn[t]}
     if missing_tables:
         print ("The following table names could not be found in the %s directory.\n%s\n"%
                (dir_path,"\n".join(missing_tables)))
     return {k:v for k,v in rtn.items() if v}
Exemple #38
0
def examine_excel_dialect():
    '''
    There are different CSV dialects across applications because there is no single CSV standard. A dialect is simply a grouping of certain format
    parameters. Examples of format parameters include the delimiter (defaults to ',') and how double quotes (') that appear inside data fields should
    be handled.
    - The only built-in dialects on my machine appear to be 'excel-tab' and 'excel'
    '''
    print(csv.list_dialects())  # ['excel-tab', 'excel']
    print(dir(csv.excel))  # delimiter, doublequote, etc.
    print(csv.excel.delimiter)  # ','
Exemple #39
0
def get_power_params(qc_path, file_):

    import csv

    subj_dir = os.path.dirname(qc_path)

    subj_dir = os.path.join(subj_dir, 'power_params')

    scans = os.listdir(subj_dir)

    meanFD = None
    meanDVARS = None

    for scan in scans:

        if scan in file_:

            subj_dir = os.path.join(subj_dir, scan)

            threshold = '_threshold_'

            if 'SCRUB_' in file_:
                threshold += (file_.split('SCRUB_')[1]).split('_')[0]
                subj_dir = os.path.join(subj_dir, threshold)
            else:

                subj_dir = os.path.join(subj_dir, os.listdir(subj_dir)[0])


            params_file = os.path.join(subj_dir, os.listdir(subj_dir)[0])
            csv_file = csv.DictReader(open(params_file, 'rb'), delimiter=',')
            print 'params_file: ', params_file
            print csv.list_dialects()

            line = None

            for line in csv_file:

                meanFD = line['MeanFD']
                meanDVARS = line['MeanDVARS']
                print meanFD, meanDVARS
                
                return meanFD, meanDVARS
Exemple #40
0
    def getTitles(self):
        """
        Shows off the title of each column. (REFAZER DEPOIS)
        """
        with open(self.csv, 'r') as csvfile:
            l = csv.list_dialects()

        for title in l:
            print(title)

        return None
def test_manager_dialects():
    assert manager.get_dialects() == ()
    manager.register_dialect('test', delimiter='|')

    assert 'test' in csv.list_dialects()

    # duplicates not allowed by name
    with pytest.raises(ValueError):
        manager.register_dialect('test', delimiter=',')

    assert manager.get_dialects() == (('test', {'delimiter': '|'}), )

    registered = manager.get_dialects_list()
    csv_registered = csv.list_dialects()
    assert len(registered) == len(csv_registered)
    for d in csv_registered:
        assert d in registered

    manager.reset()
    assert manager.get_dialects() == ()
Exemple #42
0
def main():
    try:
        import setproctitle
        setproctitle.setproctitle(name)
    except ImportError:
        pass

    dialects = csv.list_dialects()
    dialects.sort()
    dialects.insert(0, 'sniff')

    # CLI arguments
    parser = argparse.ArgumentParser(prog=name, description=description)

    parser.add_argument('-V', '--version',  action='version', version="%(prog)s " + version)
    parser.add_argument('-C', '--config',   help='Use a different configuration file')
    parser.add_argument('-s', '--section',  help='Configuration file section', default='DEFAULT')
    parser.add_argument('-c', '--channel',  help='Send to this channel or @username')
    parser.add_argument('-U', '--url',      help='Mattermost webhook URL')
    parser.add_argument('-u', '--username', help='Username')
    parser.add_argument('-i', '--icon',     help='Icon')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('-t', '--tabular', metavar='DIALECT', const='sniff',
                       nargs='?', choices=dialects,
                       help='Parse input as CSV and format it as a table (DIALECT can be one of %(choices)s)')
    group.add_argument('-y', '--syntax', default='auto')

    parser.add_argument('-I', '--info', action='store_true',
                        help='Include file information in message')
    parser.add_argument('-n', '--dry-run', '--just-print', action='store_true',
                        help="Don't send, just print the payload")
    parser.add_argument('-f', '--file', default='-',
                        help="Read content from FILE. If - reads from standard input (DEFAULT: %(default)s)")

    args = parser.parse_args()

    if args.file == '-':
        message = sys.stdin.read()
        filename = None
    else:
        message = ''
        filename = args.file

    try:
        payload = send(args.channel, message, filename, args.url,
                       args.username, args.icon, args.syntax, args.tabular,
                       args.info, args.dry_run, args.section, name,
                       args.config)
    except (configparser.Error, TypeError, RuntimeError) as e:
        sys.exit(str(e))

    if args.dry_run:
        print(payload)
 def dictlist2csv(self, file, dictlist):
     if 'unix' not in csv.list_dialects():
         csv.register_dialect('unix', lineterminator='\n')
     with open(file, 'wb') as csvfile:
         fieldsnames = dictlist[0].keys()
         writer = csv.DictWriter(csvfile, fieldsnames, dialect='unix')
         writer.writeheader()
         for row in dictlist:
             try:
                 writer.writerow(row)
             except:
                 print(row, dictlist.index(row))
                 raise
Exemple #44
0
def guess_dialect(filename):
	"""tries to guess the dialect of csv files"""
	best = ''
	max_columns = 0
	for dialect in csv.list_dialects():
		file = open(filename, 'r')
		rd = ucsv.reader(file, dialect=dialect)
		header = rd.next()
		if len(header) > max_columns:
			max_columns = len(header)
			best = dialect
		file.close()
	return best
Exemple #45
0
def downloadcsv(request):
    """Create a CSV file and download it.
    Avoid creating a file in memory.
    """
    logging.info("dialects: %s" % csv.list_dialects())
    # TODO: what encoding? utf-8 seems likely
    fname = "rostrum-%s" % datetime.datetime.now().isoformat()[:19]
    response = HttpResponse(mimetype="text/csv; charset=utf-8")
    response["Content-Disposition"] = 'attachment; filename="%s"' % fname
    writer = csv.writer(response)
    writer.writerow(MOGRIFIELDS.keys())  # first row is original headers and order
    apps = App.objects.all()
    for app in apps:
        row = [getattr(app, v, u"").encode("utf-8") for v in MOGRIFIELDS.values()]  # put fields in original order
        writer.writerow(row)
    return response
Exemple #46
0
def main():
    parser = argparse.ArgumentParser(
        description=u"a tool to approximate a distribution stored in a CSV file"
    )
    parser.add_argument("--range",
        type=range_arg,
        help=u"the range to process, e.g. A3:A24",
        metavar="RANGE",
        required=True
    )
    parser.add_argument("--dialect",
        help=u"the dialect of the CSV input file, default is 'excel'",
        choices=csv.list_dialects(),
        default="excel",
        required=False
    )
    parser.add_argument("--delimiter",
        help=u"field delimiter of the CSV input file, default is ','",
        default=",",
        required=False
    )
    parser.add_argument("--quote-char",
        help=u"the quotation character of the CSV input file, default is '\"'",
        default='"',
        required=False
    )
    parser.add_argument("--n-null-max",
        help=u"the amount of interations the approximation algorithm runs",
        type=int,
        default=15,
        required=False,
        metavar="N"
    )
    parser.add_argument("file",
        type=input_file_arg,
        help=u"the CSV file to process",
        metavar="FILE"
    )
    args = parser.parse_args(sys.argv[1:])
    csv_reader = unicode_csv_reader(args.file,
        dialect=args.dialect,
        delimiter=args.delimiter,
        quotechar=args.quote_char
    )
    values = read_csv_values(csv_reader, args.range)
    values = scale_values(sorted(values, reverse=True))
    analyze(values, args.n_null_max)
Exemple #47
0
    def _get_child_types(self):
        _result = {}
        _result["datatypes"] = {"type": "string", "pattern": "(integer|string|string(\(.*\))|serial|timestamp)"}
        _result["db_types"] = {"type": "string", "enum": db_types()}
        _result["and_or"] = {"type": "string", "enum": and_or()}
        _result["in_types"] = {"type": "string", "enum": in_types()}
        _result["index_types"] = {"type": "string", "enum": index_types()}
        _result["constraint_types"] = {"type": "string", "enum": constraint_types()}
        _result["set_operator"] = {"type": "string", "enum": set_operator()}
        _result["quoting"] = {"type": "string", "enum": quoting_types()}

        _result["csv_dialects"] = {"type": "string", "enum": list_dialects()}
        _result["join_types"] = {"type": "string", "enum": join_types()}

        _result["xpath_data_format"] = {"type": "string", "enum": xpath_data_formats()}

        def make_one_of(_classes):
            return {"type": "object",
                    "anyOf":
                         [{"properties": {x: {"$ref": "#/definitions/" + x}}} for x in _classes]
                    }

        _result["statement"] = make_one_of(verbs())
        _result["condition_part"] = make_one_of(condition_part())
        _result["TabularExpressionItem"] = make_one_of(tabular_expression_item_types())
        _result["data_source_types"] = make_one_of(data_source_types())

        _result["ArrayString"] = {"type": "array", "items": {"type": "string"}}
        _result["ArrayList"] = {"type": "array"}

        _result["ArrayParameterString"] = self._child_array_of(['#/definitions/ParameterString'])
        _result["ArrayParameterConstraint"] = self._child_array_of(['#/definitions/ParameterConstraint'])
        _result["ArrayParameterColumndefinition"] = self._child_array_of(['#/definitions/ParameterColumndefinition'])
        _result["ArrayParameterSource"] = self._child_array_of(['#/definitions/ParameterSource'])
        _result["ArrayParameterWhen"] = self._child_array_of(['#/definitions/ParameterWhen'])
        _result["ArrayParameterIdentifier"] = self._child_array_of(['#/definitions/ParameterIdentifier'])
        _result["ArrayStatement"] = self._child_array_of(['#/definitions/statement'])
        _result["ArrayParameterOrderByItem"] = self._child_array_of(['#/definitions/ParameterOrderByItem'])
        _result["ArrayParameterCondition"] = self._child_array_of(['#/definitions/ParameterCondition'])
        _result["ArrayParameterField"] = self._child_array_of(['#/definitions/ParameterField'])
        _result["ArrayParameterAssignment"] = self._child_array_of(['#/definitions/ParameterAssignment'])
        _result["ArrayExpressionItem"] = self._child_array_of(make_one_of(expression_item_types()))
        _result["ArrayTabularExpressionItem"] = self._child_array_of(make_one_of(tabular_expression_item_types()))


        return _result
Exemple #48
0
def parse_args():
    parser = argparse.ArgumentParser(description='Converts a CSV file to an OSM file')
    parser.add_argument('csv_file', help='CSV file to read')
    parser.add_argument('output_file', help='Output file name')
    parser.add_argument('--csv-dialect', default='excel',
        help='The csv dialect, i.e. the algorithm used to interpret the textual data. '
             'Can be one of: ' + ', '.join(csv.list_dialects()))
    parser.add_argument('--csv-encoding', default='utf-8',
            help='Character encoding of the CSV file. Examples: utf-8, latin1')
    parser.add_argument('--lon', dest='longitude_field', default='longitude',
        help='Name of the field that contains the longitude')
    parser.add_argument('--lat', dest='latitude_field', default='latitude',
        help='Name of the field that contains the latitude')
    parser.add_argument('--translator', help='Python file to import that '
        'contains special translation methods to transform the tags.')
    parser.add_argument('-f', '--force', dest='force_overwrite', action='store_true',
        help='Force overwriting the destination file.')
    return parser.parse_args()
Exemple #49
0
    def __init__(self, parent, csvfilepath):
        self.parent = parent
        self.csvfilepath = csvfilepath

        if csvfilepath is None:
            dialect = csv.get_dialect(csv.list_dialects()[0])
            self.has_header = False
        else:
            dialect, self.has_header = sniff(self.csvfilepath)

        self.param_labels = []
        self.param_widgets = []

        self._setup_param_widgets()
        self._do_layout()
        self._update_settings(dialect)

        self.choice_dialects.SetSelection(0)
Exemple #50
0
def saveDataCSV(self, dialect, outfilename):
    """
        This function writes the already read csv data into a choosen file with a certain dialect.

        :param dialect: One of the dialects provided by python or saved in python.
        :param outfilename: the path and name of the wanted output file
        """
    if dialect in csv.list_dialects():
        ofile = open(outfilename, "w")
        writer = csv.writer(ofile, dialect=dialect)
        writer.writerow(self.header)

        for row in self.storealt:
            writer.writerow(row)

        ofile.close()

    else:
        raise TypeError("Dialect is unknown. Read csv.list_dialects for all dialects.")
Exemple #51
0
def read_config():
    """Reads the general config file and returns the resulting config object.

    """
    config = {'camera-dev': '0',
              'save-filename-pattern': 'exam-{student-id}-{seq-number}.png',
              'csv-dialect': 'tabs'}
    parser = ConfigParser.SafeConfigParser()
    parser.read([os.path.expanduser('~/.eyegrade.cfg'),
                 os.path.expanduser('~/.camgrade.cfg')])
    if 'default' in parser.sections():
        for option in parser.options('default'):
            config[option] = parser.get('default', option)
    if not config['csv-dialect'] in csv.list_dialects():
        config['csv-dialect'] = 'tabs'
    if 'error-logging' in config and config['error-logging'] == 'yes':
        config['error-logging'] = True
    else:
        config['error-logging'] = False
    config['camera-dev'] = int(config['camera-dev'])
    return config
Exemple #52
0
 def write_directory(self, tic_dat, dir_path, allow_overwrite = False, dialect='excel',
                     write_header = True):
     """
     write the ticDat data to a collection of csv files
     :param tic_dat: the data object
     :param dir_path: the directory in which to write the csv files
     :param allow_overwrite: boolean - are we allowed to overwrite existing
                             files?
     :param dialect: the csv dialect. Consult csv documentation for details.
     :param write_header: Boolean. Should the header information be written
                          as the first row?
     :return:
     """
     verify(dialect in csv.list_dialects(), "Invalid dialect %s"%dialect)
     verify(not os.path.isfile(dir_path), "A file is not a valid directory path")
     tdf = self.tic_dat_factory
     msg = []
     if not self.tic_dat_factory.good_tic_dat_object(tic_dat, lambda m : msg.append(m)) :
         raise TicDatError("Not a valid TicDat object for this schema : " + " : ".join(msg))
     if not allow_overwrite:
         for t in tdf.all_tables :
             f = os.path.join(dir_path, t + ".csv")
             verify(not os.path.exists(f), "The %s path exists and overwrite is not allowed"%f)
     if not os.path.isdir(dir_path) :
         os.mkdir(dir_path)
     for t in tdf.all_tables :
         f = os.path.join(dir_path, t + ".csv")
         with open(f, 'w') as csvfile:
              writer = csv.DictWriter(csvfile,dialect=dialect, fieldnames=
                     tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ()) )
              writer.writeheader() if write_header else None
              _t =  getattr(tic_dat, t)
              if dictish(_t) :
                  for p_key, data_row in _t.items() :
                      primaryKeyDict = {f:v for f,v in zip(tdf.primary_key_fields[t],
                                         p_key if containerish(p_key) else (p_key,))}
                      writer.writerow(dict(data_row, **primaryKeyDict))
              else :
                  for data_row in (_t if containerish(_t) else _t()) :
                      writer.writerow(dict(data_row))
Exemple #53
0
 def get_duplicates(self, dir_path, dialect='excel', headers_present = True):
     """
     Find the row counts indexed by primary key for duplicated primary key records.
     :param dir_path: the directory containing .csv files.
     :param dialect: the csv dialect. Consult csv documentation for details.
     :param headers_present: Boolean. Does the first row of data contain
                             the column headers?
     :return: A dictionary whose keys are the table names for the primary key tables. Each value
              of the return dictionary is itself a dictionary. The inner dictionary is keyed by the
              primary key values encountered in the table, and the value is the count of records in the
              Excel sheet with this primary key. Row counts smaller than 2 are pruned off, as they
              aren't duplicates
     caveats: Missing files resolve to an empty table, but missing fields (data or primary key) on
              matching files throw an Exception.
     """
     verify(dialect in csv.list_dialects(), "Invalid dialect %s"%dialect)
     verify(os.path.isdir(dir_path), "Invalid directory path %s"%dir_path)
     tdf = self.tic_dat_factory
     rtn = {t:defaultdict(int) for t,_ in tdf.primary_key_fields.items()
            if _ and os.path.isfile(os.path.join(dir_path, t + ".csv"))}
     for t in rtn:
         if not headers_present:
             self._verify_fields_by_cnt(dir_path, t, dialect)
         fieldnames=tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())
         dict_rdr_args = dict({"fieldnames":fieldnames} if not headers_present else{},
                          **{"dialect": dialect})
         with open(os.path.join(dir_path, t + ".csv")) as csvfile:
             for r in csv.DictReader(csvfile, **dict_rdr_args) :
                 verify(set(r.keys()).issuperset(fieldnames),
                        "Failed to find the required field names for %s"%t)
                 p_key = _try_float(r[tdf.primary_key_fields[t][0]]) \
                         if len(tdf.primary_key_fields[t])==1 else \
                         tuple(_try_float(r[_]) for _ in tdf.primary_key_fields[t])
                 rtn[t][p_key] += 1
     for t in rtn.keys():
         rtn[t] = {k:v for k,v in rtn[t].items() if v > 1}
         if not rtn[t]:
             del(rtn[t])
     return rtn
Exemple #54
0
def main():
    parser = argparse.ArgumentParser(description='Export form data'
                                                 'to a CSV file')
    parser.add_argument('-o', '--outfile',
                        help='output to a CSV file instead of stdout')
    parser.add_argument('-n', '--noheader', action='store_true',
                        help='Do not output field names on the first line')
    parser.add_argument('-d', '--dialect', choices=csv.list_dialects(),
                        default='excel-tab', help="CSV dialect to output "
                        "(default %(default)s)")
    parser.add_argument('formid',
                        help="id of form whose data should be extracted")
    parser.add_argument('field', nargs="+",
                        help="list of fields that should be exported")
    args = parser.parse_args()

    data = export_data(args.formid, *args.field)
    heading = None if args.noheader else args.field
    if args.outfile:
        outf = open(args.outfile, 'w', newline='', encoding='utf-8')
    else:
        outf = sys.stdout
    with outf:
        write_csv(outf, data, args.dialect, heading)
def get_program_args():
    parser = argparse.ArgumentParser(description='Returns JIRA issues associated with a given user')
    parser.add_argument('usernames', nargs='+', help='At least one JIRA username must be specified.')

    parser.add_argument('-s', '--startDate', dest='start_date', type=valid_date, default=date(1990, 1, 1),
                        metavar='YYYY-MM-DD', help="The date from which JIRAs are returned.")

    parser.add_argument('-e', '--endDate', dest='end_date', type=valid_date, default=date.today(), metavar='YYYY-MM-DD',
                        help="The date of the most recent JIRA to be returned.")

    parser.add_argument('-d', '--domains', nargs='+', choices=JIRA_LOCATIONS.keys(), default=JIRA_LOCATIONS,
                        help="A list of the JIRA keys associated with the domain(s) that should be searched.")

    parser.add_argument('-jl', '--jira-limit', dest='jira_limit', type=check_negative_int, default=50,
                        help="The maximum number of JIRA issues that will be returned for each domain.")

    parser.add_argument('--lifo', dest='order', action='store_const', const='DESC', default='ASC',
                        help="JIRA issues are output from the most recently updated issue.")

    parser.add_argument('--no-ascii', dest='ascii', action='store_false',
                        help="JIRA issues will not be output to the console.")

    parser.add_argument('-c', '--csv', dest='csv', nargs='?', type=str, choices=csv.list_dialects(), const='excel',
                        help="JIRA issues are output to a csv file 'jira.csv'. This flag takes an optional keyword "
                             "parameter that determines the format of the generated csv file.")

    args = parser.parse_args()

    # Convert list of domain keys into OrderedDict
    if isinstance(args.domains, list):
        domains = OrderedDict()
        for domain_key in args.domains:
            domains[domain_key] = JIRA_LOCATIONS[domain_key]
        args.domains = domains

    return args
Exemple #56
0
def parse_args():
    parser = argparse.ArgumentParser(description='Converts a CSV file to an OSM file')
    parser.add_argument('csv_file', help='CSV file to read')
    parser.add_argument('output_file', help='Output file name')
    parser.add_argument('--csv-dialect', default='excel',
        help='The csv dialect, i.e. the algorithm used to interpret the textual data. '
             'Can be one of: ' + ', '.join(csv.list_dialects()))
    parser.add_argument('--csv-encoding', default='utf-8',
            help='Character encoding of the CSV file. Examples: utf-8, latin1')
    parser.add_argument('--lon', dest='longitude_field', default='longitude',
        help='Name of the field that contains the longitude')
    parser.add_argument('--lat', dest='latitude_field', default='latitude',
        help='Name of the field that contains the latitude')
    parser.add_argument('--src-proj', dest='source_projection', default=WGS84_PROJECTION,
        help='Projection of the latitude and longitude fields, defined '
             'according to PROJ.4 syntax. Only specify if the projection '
             'is not the standard GPS/WGS84 projection (defined in PROJ.4 as '
             '\'epsg:4326\'). In case of doubt, ignore this parameter. '
             'Requires the pyproj library.')
    parser.add_argument('--translator', help='Python file to import that '
        'contains special translation methods to transform the tags.')
    parser.add_argument('-f', '--force', dest='force_overwrite', action='store_true',
        help='Force overwriting the destination file.')
    return parser.parse_args()
# csv_list_dialects.py
import csv

print(csv.list_dialects())
Exemple #58
0
 def test_pickle(self):
     for name in csv.list_dialects():
         dialect = csv.get_dialect(name)
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
             self.assertRaises(TypeError, pickle.dumps, dialect, proto)
Exemple #59
0
 def test_copy(self):
     for name in csv.list_dialects():
         dialect = csv.get_dialect(name)
         self.assertRaises(TypeError, copy.copy, dialect)
Exemple #60
0
        for line in xrange(nlines):
            records_in_line = STATIS.readline().split()
            for record in records_in_line:
                records[row].append(float(record))

    # return values
    return records
  

class unix_tab(csv.excel_tab):
    """Describe the usual properties of Excel-generated TAB-delimited files."""
    delimiter = '\t'
    lineterminator = '\n'
csv.register_dialect("unix-tab", unix_tab)  

dialects_list = ', '.join(map(str, csv.list_dialects()))

# initialize parser
parser = argparse.ArgumentParser(description='Converts a DL_POLY 4 STATIS file to table format.')
parser.add_argument('STATIS', nargs='?', type=str, help='filename of STATIS file. Default: %(default)s', default='STATIS')
parser.add_argument('-o', '--out', metavar='CSV', dest='CSV', type=str, help='filename for CSV output. Default: %(default)s', default='STATIS.csv')
parser.add_argument('-d', '--dialect', dest='dialect_name', type=str, help='File output format: ' + dialects_list + '. Default: %(default)s', default='unix-tab')

# parse arguments
args = parser.parse_args()
 
# read records from STATIS file
with open(args.STATIS, 'r') as STATIS:
    records = read_STATIS(STATIS)

# write records to CSV file