def get_value(parsable_out, division, section, subsection, key): """ Gets the value (right-most field) out of gristle_determinator parsable output given the key values for the rest of the fields. """ mydialect = csv.Dialect mydialect.delimiter = '|' mydialect.quoting = file_type.get_quote_number('QUOTE_ALL') mydialect.quotechar = '"' mydialect.lineterminator = '\n' csvobj = csv.reader(parsable_out.split('\n'), dialect=mydialect) for record in csvobj: if not record: continue assert len(record) == 5 rec_division = record[0] rec_section = record[1] rec_subsection = record[2] rec_key = record[3] rec_value = record[4] if (rec_division == division and rec_section == section and rec_subsection == subsection and rec_key == key): return rec_value return None
def setup_method(self, method): self.tmp_dir = tempfile.mkdtemp(prefix='datagristle_deter_') recs = [['Alabama', '8', '18'], ['Alaska', '6', '16'], ['Arizona', '6', '14'], ['Arkansas', '2', '12'], ['California', '19', '44'], ['Colorado', '19', '44'], ['Illinois', '19', '44'], ['Indiana', '19', '44'], ['Kansas', '19', '44'], ['Kentucky', '19', '44'], ['Louisiana', '19', '44'], ['Maine', '19', '44'], ['Mississippi', '19', '44'], ['Nebraska', '19', '44'], ['Oklahoma', '19', '44'], ['Tennessee', '19', '44'], ['Texas', '19', '9999'], ['Virginia', '19', '44'], ['West Virginia', '19', '44']] self.file_struct = {} self.field_struct = {} fqfn = generate_test_file(delim='|', rec_list=recs, quoted=False, dir_name=self.tmp_dir) cmd = '%s %s --read-limit 4 --outputformat=parsable' % (os.path.join( script_path, 'gristle_determinator'), fqfn) runner = envoy.run(cmd) print(runner.std_out) print(runner.std_err) assert runner.status_code == 0 mydialect = csv.Dialect mydialect.delimiter = '|' mydialect.quoting = file_type.get_quote_number('QUOTE_ALL') mydialect.quotechar = '"' mydialect.lineterminator = '\n' csvobj = csv.reader(runner.std_out.split('\n'), dialect=mydialect) for record in csvobj: if not record: continue assert len(record) == 5 division = record[0] section = record[1] subsection = record[2] key = record[3] value = record[4] assert division in [ 'file_analysis_results', 'field_analysis_results' ] if division == 'file_analysis_results': assert section == 'main' assert subsection == 'main' self.file_struct[key] = value elif division == 'field_analysis_results': assert 'field_' in section assert subsection in ['main', 'top_values'] if section not in self.field_struct: self.field_struct[section] = {} if subsection not in self.field_struct[section]: self.field_struct[section][subsection] = {} self.field_struct[section][subsection][key] = value
def overrider(dialect): dialect.delimiter = delimiter or dialect.delimiter if quoting: dialect.quoting = file_type.get_quote_number(quoting) if quoting else dialect.quoting elif dialect.quoting: pass else: dialect.quoting = file_type.get_quote_number('quote_none') dialect.quotechar = quotechar or dialect.quotechar try: dialect.has_header = has_header if has_header is not None else dialect.has_header except AttributeError: dialect.has_header = False dialect.lineterminator = '\n' return dialect
def setup_method(self, method): self.tmp_dir = tempfile.mkdtemp(prefix='datagristle_deter_') recs = [['Alabama', '8', '18'], ['Alaska', '6', '16'], ['Arizona', '6', '14'], ['Arkansas', '2', '12'], ['California', '19', '44']] self.file_struct = {} self.field_struct = {} fqfn = generate_test_file(delim='|', rec_list=recs, quoted=False, dir_name=self.tmp_dir) cmd = '%s %s --outputformat=parsable' % (os.path.join(script_path, 'gristle_determinator'), fqfn) runner = envoy.run(cmd) print(runner.std_out) print(runner.std_err) assert runner.status_code == 0 mydialect = csv.Dialect mydialect.delimiter = '|' mydialect.quoting = file_type.get_quote_number('QUOTE_ALL') mydialect.quotechar = '"' mydialect.lineterminator = '\n' csvobj = csv.reader(runner.std_out.split('\n'), dialect=mydialect) pp(csvobj) for record in csvobj: if not record: continue assert len(record) == 5 division = record[0] section = record[1] subsection = record[2] key = record[3] value = record[4] assert division in ['file_analysis_results', 'field_analysis_results'] if division == 'file_analysis_results': assert section == 'main' assert subsection == 'main' self.file_struct[key] = value elif division == 'field_analysis_results': assert 'field_' in section assert subsection in ['main', 'top_values'] if section not in self.field_struct: self.field_struct[section] = {} if subsection not in self.field_struct[section]: self.field_struct[section][subsection] = {} self.field_struct[section][subsection][key] = value
def test_none(self): with pytest.raises(ValueError): mod.get_quote_number(None)
def test_nonmatch(self): with pytest.raises(ValueError): mod.get_quote_number('quote_alot')
def test_number(self): with pytest.raises(ValueError): assert mod.get_quote_number(3) == 3 with pytest.raises(ValueError): assert mod.get_quote_number('3') == 3
def test_uppercase(self): assert mod.get_quote_number('quote_minimal') \ == mod.get_quote_number('QUOTE_MINIMAL')
def test_lowercase(self): assert mod.get_quote_number('quote_minimal') == 0 assert mod.get_quote_number('quote_all') == 1 assert mod.get_quote_number('quote_none') == 3 assert mod.get_quote_number('quote_nonnumeric') == 2
def setup_method(self, method): self.tmp_dir = tempfile.mkdtemp(prefix='datagristle_deter_') recs = [['Alabama', '8', '18'], ['Alaska', '6', '16'], ['Arizona', '6', '14'], ['Arkansas', '2', '12'], ['California', '19', '44'], ['Colorado', '19', '44'], ['Illinois', '19', '44'], ['Indiana', '19', '44'], ['Kansas', '19', '44'], ['Kentucky', '19', '44'], ['Louisiana', '19', '44'], ['Maine', '19', '44'], ['Mississippi', '19', '44'], ['Nebraska', '19', '44'], ['Oklahoma', '19', '44'], ['Tennessee', '19', '44'], ['Texas', '19', '9999'], ['Virginia', '19', '44'], ['West Virginia', '19', '44']] self.file_struct = {} self.field_struct = {} fqfn = generate_test_file(delim='|', rec_list=recs, quoted=False, dir_name=self.tmp_dir) cmd = '%s %s --max-freq 10 --outputformat=parsable' % (os.path.join(script_path, 'gristle_determinator'), fqfn) runner = envoy.run(cmd) print(runner.std_out) print(runner.std_err) assert runner.status_code == 0 mydialect = csv.Dialect mydialect.delimiter = '|' mydialect.quoting = file_type.get_quote_number('QUOTE_ALL') mydialect.quotechar = '"' mydialect.lineterminator = '\n' csvobj = csv.reader(runner.std_out.split('\n'), dialect=mydialect) for record in csvobj: if not record: continue if len(record) != 5: if 'WARNING: freq dict is too large' in record[0]: continue # ignore warning row else: pytest.fail('Invalid result record: %s' % record[0]) division = record[0] section = record[1] subsection = record[2] key = record[3] value = record[4] assert division in ['file_analysis_results', 'field_analysis_results'] if division == 'file_analysis_results': assert section == 'main' assert subsection == 'main' self.file_struct[key] = value elif division == 'field_analysis_results': assert 'field_' in section assert subsection in ['main', 'top_values'] if section not in self.field_struct: self.field_struct[section] = {} if subsection not in self.field_struct[section]: self.field_struct[section][subsection] = {} self.field_struct[section][subsection][key] = value