def test_text_file_filter(self): print 'testing text_file_filter' testinputfile = self.framework.testinputfile testreportfile = self.framework.testreportfile workspace = self.framework.testdatapath outputfile = '%s/%s' % (workspace.rstrip('/'), testreportfile) termname = 'year' matchingvalue = '1990' inputs = {} inputs['inputfile'] = testinputfile inputs['termname'] = termname inputs['matchingvalue'] = matchingvalue inputs['workspace'] = workspace inputs['outputfile'] = testreportfile # Create the report #print 'inputs:\n%s' % inputs response = text_file_filter(inputs) #print 'response:\n%s' % response success = response['success'] s = 'text file filter failed: %s' % response['message'] self.assertTrue(success, s) outputfile = response['outputfile'] #print 'response:\n%s' % response s = 'Output file %s not created' % outputfile self.assertTrue(os.path.isfile(outputfile), s) header = read_header(outputfile) dialect = csv_file_dialect(outputfile) encoding = csv_file_encoding(outputfile) matches = 0 # Iterate through all rows in the input file for row in read_csv_row(outputfile, dialect=dialect, encoding=encoding, header=True, fieldnames=header): #print 'row: %s' % row if row[termname] == matchingvalue: matches += 1 expected = 5 s = 'Number of matches in output (%s) not as expected (%s)' % ( matches, expected) self.assertEqual(matches, expected, s) matches = count_rows(outputfile) expected = 7 s = 'Number of matches of %s in %s ' % (matchingvalue, outputfile) s += 'was %s, not as expected (%s) ' % (matches, expected) self.assertEqual(matches, expected, s)
def test_utf8_encoder(self): print 'testing utf8_encoder' testfile1 = self.framework.testfile1 testfile2 = self.framework.testfile2 testdatapath = self.framework.testdatapath outputfile = self.framework.outputfile inputs = {} inputs['inputfile'] = testfile1 inputs['outputfile'] = outputfile inputs['workspace'] = testdatapath # Translate the file to utf8 encoding response = utf8_encoder(inputs) outfilelocation = '%s/%s' % (testdatapath, outputfile) encoding = csv_file_encoding(outfilelocation) expected = 'utf-8' s = 'From input: %s\nFound:\n%s\nExpected:\n%s' % (testfile1, encoding, expected) self.assertEqual(encoding, expected, s) inputs['inputfile'] = testfile2 # Translate the file to utf8 encoding response = utf8_encoder(inputs) encoding = csv_file_encoding(outfilelocation) s = 'From input: %s\nFound:\n%s\nExpected:\n%s' % (testfile2, encoding, expected) self.assertEqual(encoding, expected, s) inputs['encoding'] = 'mac_roman' # Translate the file to utf8 encoding response = utf8_encoder(inputs) encoding = csv_file_encoding(outfilelocation) s = 'From input: %s\nFound:\n%s\nExpected:\n%s' % (testfile2, encoding, expected) self.assertEqual(encoding, expected, s)
def test_term_standardizer_report(self): print 'testing term_standardizer_report' testcorrectioninputfile = self.framework.testcorrectioninputfile testcorrectionreportfile = self.framework.testcorrectionreportfile testmonthvocabfile = self.framework.testmonthvocabfile key = 'month' result = term_standardizer_report(testcorrectioninputfile, \ testcorrectionreportfile, testmonthvocabfile, key) s = 'term_standardizer_report() result not True ' s += 'with inputfile: %s ' % testcorrectioninputfile s += 'outpufile: %s' % testcorrectionreportfile s += 'and vocabfile: %s' % testmonthvocabfile self.assertTrue(result, s) outputheader = read_header(testcorrectionreportfile) expected = ['ID', 'month', 'country', 'month_orig'] s = 'outputheader: %s not as expected: %s' % (outputheader, expected) self.assertEqual(outputheader, expected, s) dialect = csv_file_dialect(testcorrectionreportfile) encoding = csv_file_encoding(testcorrectionreportfile) rows = read_rows(testcorrectionreportfile, 1, dialect=dialect, encoding=encoding, header=True, fieldnames=outputheader) firstrow = rows[0] field = 'month_orig' value = firstrow[field] expected = 'vi' s = 'Field %s value %s not as expected (%s)' % (field, value, expected) self.assertEqual(value, expected, s) field = 'month' value = firstrow[field] expected = '6' s = 'Field %s value %s not as expected (%s)' % (field, value, expected) self.assertEqual(value, expected, s)
def test_text_file_field_stripper(self): print 'testing text_file_field_stripper' testinputfile = self.framework.testinputfile testreportfile = self.framework.testreportfile workspace = self.framework.testdatapath outputfile = '%s/%s' % (workspace.rstrip('/'), testreportfile) termlist = 'country|stateProvince' inputs = {} inputs['inputfile'] = testinputfile inputs['termlist'] = termlist inputs['workspace'] = workspace inputs['outputfile'] = testreportfile inputs['separator'] = '|' # Create the report #print 'inputs:\n%s' % inputs response = text_file_field_stripper(inputs) #print 'response:\n%s' % response success = response['success'] s = 'text file filter failed: %s' % response['message'] self.assertTrue(success, s) outputfile = response['outputfile'] #print 'response:\n%s' % response s = 'Output file %s not created' % outputfile self.assertTrue(os.path.isfile(outputfile), s) header = read_header(outputfile) dialect = csv_file_dialect(outputfile) encoding = csv_file_encoding(outputfile) rows = count_rows(outputfile) expected = 10 s = 'Number of rows in %s ' % outputfile s += 'was %s, not as expected (%s) ' % (rows, expected) self.assertEqual(rows, expected, s) expected = ['country', 'stateprovince'] s = 'Header: %s, not as expected: %s' % (header, expected) self.assertEqual(header, expected, s)
def test_term_setter_report(self): print 'testing term_setter_report' testsetterinputfile = self.framework.testsetterinputfile testsetterreportfile = self.framework.testsetterreportfile # Test field addition key = 'institutionCode' result = term_setter_report(testsetterinputfile, testsetterreportfile, key, constantvalues='CAS') s = 'term_setter_report() result not True ' s += 'with inputfile: %s ' % testsetterinputfile s += 'and outputfile: %s' % testsetterreportfile self.assertTrue(result, s) outputheader = read_header(testsetterreportfile) expected = ['ID', 'month', 'country', 'institutionCode'] s = 'outputheader: %s not as expected: %s' % (outputheader, expected) self.assertEqual(outputheader, expected, s) dialect = csv_file_dialect(testsetterreportfile) encoding = csv_file_encoding(testsetterreportfile) rows = read_rows(testsetterreportfile, 1, dialect=dialect, encoding=encoding, header=True, fieldnames=outputheader) firstrow = rows[0] field = 'institutionCode' value = firstrow[field] expected = 'CAS' s = 'Field %s value %s not as expected (%s)' % (field, value, expected) self.assertEqual(value, expected, s) # Test field list addition key = 'institutionCode|license' result = term_setter_report(testsetterinputfile, testsetterreportfile, key, constantvalues='CAS|CC0') s = 'term_setter_report() result not True ' s += 'with inputfile: %s ' % testsetterinputfile s += 'and outputfile: %s' % testsetterreportfile self.assertTrue(result, s) outputheader = read_header(testsetterreportfile) expected = ['ID', 'month', 'country', 'institutionCode', 'license'] s = 'outputheader: %s not as expected: %s' % (outputheader, expected) self.assertEqual(outputheader, expected, s) dialect = csv_file_dialect(testsetterreportfile) encoding = csv_file_encoding(testsetterreportfile) rows = read_rows(testsetterreportfile, 1, dialect=dialect, encoding=encoding, header=True, fieldnames=outputheader) firstrow = rows[0] field = 'institutionCode' value = firstrow[field] expected = 'CAS' s = 'Field %s value %s not as expected (%s)' % (field, value, expected) self.assertEqual(value, expected, s) field = 'license' value = firstrow[field] expected = 'CC0' s = 'Field %s value %s not as expected (%s)' % (field, value, expected) self.assertEqual(value, expected, s) # Test field replacement key = 'country' result = term_setter_report(testsetterinputfile, testsetterreportfile, key, constantvalues='Argentina') s = 'term_setter_report() result not True ' s += 'with inputfile: %s ' % testsetterinputfile s += 'and outputfile: %s' % testsetterreportfile self.assertTrue(result, s) outputheader = read_header(testsetterreportfile) expected = ['ID', 'month', 'country'] s = 'outputheader: %s not as expected: %s' % (outputheader, expected) self.assertEqual(outputheader, expected, s) dialect = csv_file_dialect(testsetterreportfile) encoding = csv_file_encoding(testsetterreportfile) rows = read_rows(testsetterreportfile, 1, dialect=dialect, encoding=encoding, header=True, fieldnames=outputheader) firstrow = rows[0] field = 'country' value = firstrow[field] expected = 'Argentina' s = 'Field %s value %s not as expected (%s)' % (field, value, expected) self.assertEqual(value, expected, s)