def get_value(parsable_out, division, section, subsection, key):
    """ Gets the value (right-most field) out of gristle_determinator
        parsable output given the key values for the rest of the fields.
    """
    mydialect = csv.Dialect
    mydialect.delimiter = '|'
    mydialect.quoting = file_type.get_quote_number('QUOTE_ALL')
    mydialect.quotechar = '"'
    mydialect.lineterminator = '\n'

    csvobj = csv.reader(parsable_out.split('\n'), dialect=mydialect)

    for record in csvobj:
        if not record:
            continue
        assert len(record) == 5
        rec_division = record[0]
        rec_section = record[1]
        rec_subsection = record[2]
        rec_key = record[3]
        rec_value = record[4]

        if (rec_division == division
                and rec_section == section
                and rec_subsection == subsection
                and rec_key == key):
            return rec_value

    return None
Esempio n. 2
0
def get_value(parsable_out, division, section, subsection, key):
    """ Gets the value (right-most field) out of gristle_determinator
        parsable output given the key values for the rest of the fields.
    """
    mydialect = csv.Dialect
    mydialect.delimiter = '|'
    mydialect.quoting = file_type.get_quote_number('QUOTE_ALL')
    mydialect.quotechar = '"'
    mydialect.lineterminator = '\n'

    csvobj = csv.reader(parsable_out.split('\n'), dialect=mydialect)

    for record in csvobj:
        if not record:
            continue
        assert len(record) == 5
        rec_division = record[0]
        rec_section = record[1]
        rec_subsection = record[2]
        rec_key = record[3]
        rec_value = record[4]

        if (rec_division == division and rec_section == section
                and rec_subsection == subsection and rec_key == key):
            return rec_value

    return None
Esempio n. 3
0
    def setup_method(self, method):
        self.tmp_dir = tempfile.mkdtemp(prefix='datagristle_deter_')
        recs = [['Alabama', '8', '18'], ['Alaska', '6', '16'],
                ['Arizona', '6', '14'], ['Arkansas', '2', '12'],
                ['California', '19', '44'], ['Colorado', '19', '44'],
                ['Illinois', '19', '44'], ['Indiana', '19', '44'],
                ['Kansas', '19', '44'], ['Kentucky', '19', '44'],
                ['Louisiana', '19', '44'], ['Maine', '19', '44'],
                ['Mississippi', '19', '44'], ['Nebraska', '19', '44'],
                ['Oklahoma', '19', '44'], ['Tennessee', '19', '44'],
                ['Texas', '19', '9999'], ['Virginia', '19', '44'],
                ['West Virginia', '19', '44']]
        self.file_struct = {}
        self.field_struct = {}

        fqfn = generate_test_file(delim='|',
                                  rec_list=recs,
                                  quoted=False,
                                  dir_name=self.tmp_dir)
        cmd = '%s %s --read-limit 4 --outputformat=parsable' % (os.path.join(
            script_path, 'gristle_determinator'), fqfn)
        runner = envoy.run(cmd)
        print(runner.std_out)
        print(runner.std_err)
        assert runner.status_code == 0

        mydialect = csv.Dialect
        mydialect.delimiter = '|'
        mydialect.quoting = file_type.get_quote_number('QUOTE_ALL')
        mydialect.quotechar = '"'
        mydialect.lineterminator = '\n'

        csvobj = csv.reader(runner.std_out.split('\n'), dialect=mydialect)
        for record in csvobj:
            if not record:
                continue
            assert len(record) == 5
            division = record[0]
            section = record[1]
            subsection = record[2]
            key = record[3]
            value = record[4]

            assert division in [
                'file_analysis_results', 'field_analysis_results'
            ]

            if division == 'file_analysis_results':
                assert section == 'main'
                assert subsection == 'main'
                self.file_struct[key] = value
            elif division == 'field_analysis_results':
                assert 'field_' in section
                assert subsection in ['main', 'top_values']
                if section not in self.field_struct:
                    self.field_struct[section] = {}
                if subsection not in self.field_struct[section]:
                    self.field_struct[section][subsection] = {}
                self.field_struct[section][subsection][key] = value
Esempio n. 4
0
        def overrider(dialect):
            dialect.delimiter  = delimiter or dialect.delimiter

            if quoting:
                dialect.quoting = file_type.get_quote_number(quoting) if quoting else dialect.quoting
            elif dialect.quoting:
                pass
            else:
                dialect.quoting = file_type.get_quote_number('quote_none')

            dialect.quotechar  = quotechar or dialect.quotechar
            try:
                dialect.has_header = has_header if has_header is not None else dialect.has_header
            except AttributeError:
                dialect.has_header = False
            dialect.lineterminator = '\n'
            return dialect
    def setup_method(self, method):
        self.tmp_dir = tempfile.mkdtemp(prefix='datagristle_deter_')
        recs = [['Alabama', '8', '18'],
                ['Alaska', '6', '16'],
                ['Arizona', '6', '14'],
                ['Arkansas', '2', '12'],
                ['California', '19', '44']]
        self.file_struct = {}
        self.field_struct = {}

        fqfn = generate_test_file(delim='|', rec_list=recs, quoted=False, dir_name=self.tmp_dir)
        cmd = '%s %s --outputformat=parsable' % (os.path.join(script_path, 'gristle_determinator'), fqfn)
        runner = envoy.run(cmd)
        print(runner.std_out)
        print(runner.std_err)
        assert runner.status_code == 0

        mydialect = csv.Dialect
        mydialect.delimiter = '|'
        mydialect.quoting = file_type.get_quote_number('QUOTE_ALL')
        mydialect.quotechar = '"'
        mydialect.lineterminator = '\n'

        csvobj = csv.reader(runner.std_out.split('\n'), dialect=mydialect)
        pp(csvobj)
        for record in csvobj:
            if not record:
                continue
            assert len(record) == 5
            division = record[0]
            section = record[1]
            subsection = record[2]
            key = record[3]
            value = record[4]

            assert division in ['file_analysis_results', 'field_analysis_results']

            if division == 'file_analysis_results':
                assert section == 'main'
                assert subsection == 'main'
                self.file_struct[key] = value
            elif division == 'field_analysis_results':
                assert 'field_' in section
                assert subsection in ['main', 'top_values']
                if section not in self.field_struct:
                    self.field_struct[section] = {}
                if subsection not in self.field_struct[section]:
                    self.field_struct[section][subsection] = {}
                self.field_struct[section][subsection][key] = value
Esempio n. 6
0
 def test_none(self):
     with pytest.raises(ValueError):
         mod.get_quote_number(None)
Esempio n. 7
0
 def test_nonmatch(self):
     with pytest.raises(ValueError):
         mod.get_quote_number('quote_alot')
Esempio n. 8
0
 def test_number(self):
     with pytest.raises(ValueError):
         assert mod.get_quote_number(3) == 3
     with pytest.raises(ValueError):
         assert mod.get_quote_number('3') == 3
Esempio n. 9
0
 def test_uppercase(self):
     assert mod.get_quote_number('quote_minimal') \
            == mod.get_quote_number('QUOTE_MINIMAL')
Esempio n. 10
0
 def test_lowercase(self):
     assert mod.get_quote_number('quote_minimal') == 0
     assert mod.get_quote_number('quote_all') == 1
     assert mod.get_quote_number('quote_none') == 3
     assert mod.get_quote_number('quote_nonnumeric') == 2
Esempio n. 11
0
 def test_none(self):
     with pytest.raises(ValueError):
         mod.get_quote_number(None)
Esempio n. 12
0
 def test_nonmatch(self):
     with pytest.raises(ValueError):
         mod.get_quote_number('quote_alot')
Esempio n. 13
0
 def test_number(self):
     with pytest.raises(ValueError):
         assert mod.get_quote_number(3) == 3
     with pytest.raises(ValueError):
         assert mod.get_quote_number('3') == 3
Esempio n. 14
0
 def test_uppercase(self):
     assert mod.get_quote_number('quote_minimal') \
            == mod.get_quote_number('QUOTE_MINIMAL')
Esempio n. 15
0
 def test_lowercase(self):
     assert mod.get_quote_number('quote_minimal') == 0
     assert mod.get_quote_number('quote_all') == 1
     assert mod.get_quote_number('quote_none') == 3
     assert mod.get_quote_number('quote_nonnumeric') == 2
    def setup_method(self, method):
        self.tmp_dir = tempfile.mkdtemp(prefix='datagristle_deter_')
        recs = [['Alabama', '8', '18'],
                ['Alaska', '6', '16'],
                ['Arizona', '6', '14'],
                ['Arkansas', '2', '12'],
                ['California', '19', '44'],
                ['Colorado', '19', '44'],
                ['Illinois', '19', '44'],
                ['Indiana', '19', '44'],
                ['Kansas', '19', '44'],
                ['Kentucky', '19', '44'],
                ['Louisiana', '19', '44'],
                ['Maine', '19', '44'],
                ['Mississippi', '19', '44'],
                ['Nebraska', '19', '44'],
                ['Oklahoma', '19', '44'],
                ['Tennessee', '19', '44'],
                ['Texas', '19', '9999'],
                ['Virginia', '19', '44'],
                ['West Virginia', '19', '44']]
        self.file_struct = {}
        self.field_struct = {}

        fqfn = generate_test_file(delim='|', rec_list=recs, quoted=False, dir_name=self.tmp_dir)
        cmd = '%s %s --max-freq 10  --outputformat=parsable' % (os.path.join(script_path, 'gristle_determinator'), fqfn)
        runner = envoy.run(cmd)
        print(runner.std_out)
        print(runner.std_err)
        assert runner.status_code == 0

        mydialect = csv.Dialect
        mydialect.delimiter = '|'
        mydialect.quoting = file_type.get_quote_number('QUOTE_ALL')
        mydialect.quotechar = '"'
        mydialect.lineterminator = '\n'

        csvobj = csv.reader(runner.std_out.split('\n'), dialect=mydialect)
        for record in csvobj:
            if not record:
                continue
            if len(record) != 5:
                if 'WARNING: freq dict is too large' in record[0]:
                    continue # ignore warning row
                else:
                    pytest.fail('Invalid result record: %s' % record[0])

            division = record[0]
            section = record[1]
            subsection = record[2]
            key = record[3]
            value = record[4]
            assert division in ['file_analysis_results', 'field_analysis_results']

            if division == 'file_analysis_results':
                assert section == 'main'
                assert subsection == 'main'
                self.file_struct[key] = value
            elif division == 'field_analysis_results':
                assert 'field_' in section
                assert subsection in ['main', 'top_values']
                if section not in self.field_struct:
                    self.field_struct[section] = {}
                if subsection not in self.field_struct[section]:
                    self.field_struct[section][subsection] = {}
                self.field_struct[section][subsection][key] = value