def test_text_file_filter(self):
        print 'testing text_file_filter'
        testinputfile = self.framework.testinputfile
        testreportfile = self.framework.testreportfile
        workspace = self.framework.testdatapath
        outputfile = '%s/%s' % (workspace.rstrip('/'), testreportfile)
        termname = 'year'
        matchingvalue = '1990'

        inputs = {}
        inputs['inputfile'] = testinputfile
        inputs['termname'] = termname
        inputs['matchingvalue'] = matchingvalue
        inputs['workspace'] = workspace
        inputs['outputfile'] = testreportfile

        # Create the report
        #print 'inputs:\n%s' % inputs
        response = text_file_filter(inputs)
        #print 'response:\n%s' % response
        success = response['success']
        s = 'text file filter failed: %s' % response['message']
        self.assertTrue(success, s)

        outputfile = response['outputfile']
        #print 'response:\n%s' % response
        s = 'Output file %s not created' % outputfile
        self.assertTrue(os.path.isfile(outputfile), s)

        header = read_header(outputfile)
        dialect = csv_file_dialect(outputfile)
        encoding = csv_file_encoding(outputfile)

        matches = 0
        # Iterate through all rows in the input file
        for row in read_csv_row(outputfile,
                                dialect=dialect,
                                encoding=encoding,
                                header=True,
                                fieldnames=header):
            #print 'row: %s' % row
            if row[termname] == matchingvalue:
                matches += 1
        expected = 5
        s = 'Number of matches in output (%s) not as expected (%s)' % (
            matches, expected)
        self.assertEqual(matches, expected, s)

        matches = count_rows(outputfile)
        expected = 7
        s = 'Number of matches of %s in %s ' % (matchingvalue, outputfile)
        s += 'was %s, not as expected (%s) ' % (matches, expected)
        self.assertEqual(matches, expected, s)
Example #2
0
    def test_utf8_encoder(self):
        print 'testing utf8_encoder'
        testfile1 = self.framework.testfile1
        testfile2 = self.framework.testfile2
        testdatapath = self.framework.testdatapath
        outputfile = self.framework.outputfile

        inputs = {}
        inputs['inputfile'] = testfile1
        inputs['outputfile'] = outputfile
        inputs['workspace'] = testdatapath

        # Translate the file to utf8 encoding
        response = utf8_encoder(inputs)
        outfilelocation = '%s/%s' % (testdatapath, outputfile)
        encoding = csv_file_encoding(outfilelocation)
        expected = 'utf-8'
        s = 'From input: %s\nFound:\n%s\nExpected:\n%s' % (testfile1, encoding,
                                                           expected)
        self.assertEqual(encoding, expected, s)

        inputs['inputfile'] = testfile2

        # Translate the file to utf8 encoding
        response = utf8_encoder(inputs)
        encoding = csv_file_encoding(outfilelocation)
        s = 'From input: %s\nFound:\n%s\nExpected:\n%s' % (testfile2, encoding,
                                                           expected)
        self.assertEqual(encoding, expected, s)

        inputs['encoding'] = 'mac_roman'

        # Translate the file to utf8 encoding
        response = utf8_encoder(inputs)
        encoding = csv_file_encoding(outfilelocation)
        s = 'From input: %s\nFound:\n%s\nExpected:\n%s' % (testfile2, encoding,
                                                           expected)
        self.assertEqual(encoding, expected, s)
    def test_term_standardizer_report(self):
        print 'testing term_standardizer_report'
        testcorrectioninputfile = self.framework.testcorrectioninputfile
        testcorrectionreportfile = self.framework.testcorrectionreportfile
        testmonthvocabfile = self.framework.testmonthvocabfile

        key = 'month'
        result = term_standardizer_report(testcorrectioninputfile, \
            testcorrectionreportfile, testmonthvocabfile, key)
        s = 'term_standardizer_report() result not True '
        s += 'with inputfile: %s ' % testcorrectioninputfile
        s += 'outpufile: %s' % testcorrectionreportfile
        s += 'and vocabfile: %s' % testmonthvocabfile
        self.assertTrue(result, s)

        outputheader = read_header(testcorrectionreportfile)
        expected = ['ID', 'month', 'country', 'month_orig']
        s = 'outputheader: %s not as expected: %s' % (outputheader, expected)
        self.assertEqual(outputheader, expected, s)

        dialect = csv_file_dialect(testcorrectionreportfile)
        encoding = csv_file_encoding(testcorrectionreportfile)
        rows = read_rows(testcorrectionreportfile,
                         1,
                         dialect=dialect,
                         encoding=encoding,
                         header=True,
                         fieldnames=outputheader)
        firstrow = rows[0]

        field = 'month_orig'
        value = firstrow[field]
        expected = 'vi'
        s = 'Field %s value %s not as expected (%s)' % (field, value, expected)
        self.assertEqual(value, expected, s)

        field = 'month'
        value = firstrow[field]
        expected = '6'
        s = 'Field %s value %s not as expected (%s)' % (field, value, expected)
        self.assertEqual(value, expected, s)
Example #4
0
    def test_text_file_field_stripper(self):
        print 'testing text_file_field_stripper'
        testinputfile = self.framework.testinputfile
        testreportfile = self.framework.testreportfile
        workspace = self.framework.testdatapath
        outputfile = '%s/%s' % (workspace.rstrip('/'), testreportfile)
        termlist = 'country|stateProvince'

        inputs = {}
        inputs['inputfile'] = testinputfile
        inputs['termlist'] = termlist
        inputs['workspace'] = workspace
        inputs['outputfile'] = testreportfile
        inputs['separator'] = '|'

        # Create the report
        #print 'inputs:\n%s' % inputs
        response = text_file_field_stripper(inputs)
        #print 'response:\n%s' % response
        success = response['success']
        s = 'text file filter failed: %s' % response['message']
        self.assertTrue(success, s)

        outputfile = response['outputfile']
        #print 'response:\n%s' % response
        s = 'Output file %s not created' % outputfile
        self.assertTrue(os.path.isfile(outputfile), s)

        header = read_header(outputfile)
        dialect = csv_file_dialect(outputfile)
        encoding = csv_file_encoding(outputfile)

        rows = count_rows(outputfile)
        expected = 10
        s = 'Number of rows in %s ' % outputfile
        s += 'was %s, not as expected (%s) ' % (rows, expected)
        self.assertEqual(rows, expected, s)

        expected = ['country', 'stateprovince']
        s = 'Header: %s, not as expected: %s' % (header, expected)
        self.assertEqual(header, expected, s)
    def test_term_setter_report(self):
        print 'testing term_setter_report'
        testsetterinputfile = self.framework.testsetterinputfile
        testsetterreportfile = self.framework.testsetterreportfile

        # Test field addition
        key = 'institutionCode'
        result = term_setter_report(testsetterinputfile,
                                    testsetterreportfile,
                                    key,
                                    constantvalues='CAS')
        s = 'term_setter_report() result not True '
        s += 'with inputfile: %s ' % testsetterinputfile
        s += 'and outputfile: %s' % testsetterreportfile
        self.assertTrue(result, s)

        outputheader = read_header(testsetterreportfile)
        expected = ['ID', 'month', 'country', 'institutionCode']
        s = 'outputheader: %s not as expected: %s' % (outputheader, expected)
        self.assertEqual(outputheader, expected, s)

        dialect = csv_file_dialect(testsetterreportfile)
        encoding = csv_file_encoding(testsetterreportfile)
        rows = read_rows(testsetterreportfile,
                         1,
                         dialect=dialect,
                         encoding=encoding,
                         header=True,
                         fieldnames=outputheader)
        firstrow = rows[0]

        field = 'institutionCode'
        value = firstrow[field]
        expected = 'CAS'
        s = 'Field %s value %s not as expected (%s)' % (field, value, expected)
        self.assertEqual(value, expected, s)

        # Test field list addition
        key = 'institutionCode|license'
        result = term_setter_report(testsetterinputfile,
                                    testsetterreportfile,
                                    key,
                                    constantvalues='CAS|CC0')
        s = 'term_setter_report() result not True '
        s += 'with inputfile: %s ' % testsetterinputfile
        s += 'and outputfile: %s' % testsetterreportfile
        self.assertTrue(result, s)

        outputheader = read_header(testsetterreportfile)
        expected = ['ID', 'month', 'country', 'institutionCode', 'license']
        s = 'outputheader: %s not as expected: %s' % (outputheader, expected)
        self.assertEqual(outputheader, expected, s)

        dialect = csv_file_dialect(testsetterreportfile)
        encoding = csv_file_encoding(testsetterreportfile)
        rows = read_rows(testsetterreportfile,
                         1,
                         dialect=dialect,
                         encoding=encoding,
                         header=True,
                         fieldnames=outputheader)
        firstrow = rows[0]

        field = 'institutionCode'
        value = firstrow[field]
        expected = 'CAS'
        s = 'Field %s value %s not as expected (%s)' % (field, value, expected)
        self.assertEqual(value, expected, s)

        field = 'license'
        value = firstrow[field]
        expected = 'CC0'
        s = 'Field %s value %s not as expected (%s)' % (field, value, expected)
        self.assertEqual(value, expected, s)

        # Test field replacement
        key = 'country'
        result = term_setter_report(testsetterinputfile,
                                    testsetterreportfile,
                                    key,
                                    constantvalues='Argentina')
        s = 'term_setter_report() result not True '
        s += 'with inputfile: %s ' % testsetterinputfile
        s += 'and outputfile: %s' % testsetterreportfile
        self.assertTrue(result, s)

        outputheader = read_header(testsetterreportfile)
        expected = ['ID', 'month', 'country']
        s = 'outputheader: %s not as expected: %s' % (outputheader, expected)
        self.assertEqual(outputheader, expected, s)

        dialect = csv_file_dialect(testsetterreportfile)
        encoding = csv_file_encoding(testsetterreportfile)
        rows = read_rows(testsetterreportfile,
                         1,
                         dialect=dialect,
                         encoding=encoding,
                         header=True,
                         fieldnames=outputheader)
        firstrow = rows[0]

        field = 'country'
        value = firstrow[field]
        expected = 'Argentina'
        s = 'Field %s value %s not as expected (%s)' % (field, value, expected)
        self.assertEqual(value, expected, s)