예제 #1
0
 def test_01_read_01(self):
     # Check that reading the gold standard file gives the right object
     clcsv = ClCsv(self.gold_csv.name)
     assert clcsv.get_column(1) == ('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
     assert clcsv.get_column('issn1') == ('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
     assert clcsv.get_column(0) == ('', ['q1', 'q2', 'q3', 'q4'])
     assert clcsv.get_column('issn4') == ('issn4', ['19', '', 'i4a3', 'i4a4'])
예제 #2
0
 def test_08_read_unicode(self):
     # Check that reading the gold standard file gives the right object
     clcsv = ClCsv(self.gold_csv_unicode.name)
     assert clcsv.get_column(1) == ('иссн1',
                                    ['ила1', 'ила2', 'ила3', 'ила4'])
     assert clcsv.get_column('иссн1') == ('иссн1',
                                          ['ила1', 'ила2', 'ила3', 'ила4'])
     assert clcsv.get_column(0) == ('', ['в1', 'в2', 'в3', 'в4'])
예제 #3
0
    def test_06_gets(self):
        # test the functions which get
        rd_csv = ClCsv(self.gold_csv.name)

        assert rd_csv.get_colnumber('issn3') == 3
        assert rd_csv.get_colnumber('pineapple') == None

        assert rd_csv.get_rownumber('q4') == 4
        assert rd_csv.get_rownumber('nothing') == None
예제 #4
0
def make_csv(path, reapps):
    cols = {}
    for r in reapps:
        assert isinstance(r, models.Suggestion)  # for pycharm type inspection
        bj = r.bibjson()
        issn = bj.get_one_identifier(idtype=bj.P_ISSN)
        if issn is None:
            issn = bj.get_one_identifier(idtype=bj.E_ISSN)
        if issn is None:
            continue

        kvs = Suggestion2QuestionXwalk.suggestion2question(r)
        cols[issn] = kvs

    issns = cols.keys()
    issns.sort()

    sheet = ClCsv(path)

    qs = None
    for i in issns:
        if qs is None:
            qs = [q for q, _ in cols[i]]
            sheet.set_column("", qs)
        vs = [v for _, v in cols[i]]
        sheet.set_column(i, vs)

    sheet.save()
예제 #5
0
def open_csv(path):
    try:
        sheet = ClCsv(path)
    except:
        raise CsvValidationException(
            "Unable to open CSV - may be corrupt or a different file format.")
    return sheet
예제 #6
0
 def _wrong_questions(self):
     sheet = ClCsv("wrong_questions.csv")
     sheet.set_column("", ["Q" + str(i) for i in range(56)])
     c1 = deepcopy(APPLICATION_COL)
     c1[0] = "First Title"
     c1[3] = "1234-5678"
     c1[4] = "9876-5432"
     sheet.set_column(c1[3], c1)
     sheet.save()
예제 #7
0
    def test_07_write_unicode(self):
        # write an object to a file, and check against pre-bult one
        wr_csv = ClCsv(self.PRFX + 'test_write_csv')
        wr_csv.set_column(u'', [u'в1', u'в2', u'в3', u'в4'])
        wr_csv.set_column(u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4'])
        wr_csv.save()

        wr_lines = open(wr_csv.file_object.name, 'rb').readlines()
        gold_lines_unicode = open(self.gold_csv_unicode.name, 'rb').readlines()
        assert gold_lines_unicode == wr_lines
예제 #8
0
def make_csv(path, reapps):
    cols = {}
    for r in reapps:
        assert isinstance(r, models.Suggestion) # for pycharm type inspection
        bj = r.bibjson()
        issn = bj.get_one_identifier(idtype=bj.P_ISSN)
        if issn is None:
            issn = bj.get_one_identifier(idtype=bj.E_ISSN)
        if issn is None:
            continue

        kvs = Suggestion2QuestionXwalk.suggestion2question(r)
        cols[issn] = kvs

    issns = cols.keys()
    issns.sort()

    sheet = ClCsv(path)

    qs = None
    for i in issns:
        if qs is None:
            qs = [q for q, _ in cols[i]]
            sheet.set_column("", qs)
        vs = [v for _, v in cols[i]]
        sheet.set_column(i, vs)

    sheet.save()
예제 #9
0
    def _wrong_issn(self):
        sheet = ClCsv("wrong_issn.csv")
        qs = reapplication.Suggestion2QuestionXwalk.question_list()
        sheet.set_column("", qs)

        c1 = deepcopy(APPLICATION_COL)
        c1[0] = "First Title"
        c1[3] = "6754-0000"
        c1[4] = "8776-0998"
        sheet.set_column(c1[3], c1)
        sheet.save()
예제 #10
0
 def _wrong_questions(self):
     sheet = ClCsv("wrong_questions.csv")
     sheet.set_column("", ["Q" + str(i) for i in range(56)])
     c1 = deepcopy(APPLICATION_COL)
     c1[0] = "First Title"
     c1[3] = "1234-5678"
     c1[4] = "9876-5432"
     sheet.set_column(c1[3], c1)
     sheet.save()
예제 #11
0
    def test_07_write_unicode(self):
        # write an object to a file, and check against pre-bult one
        wr_csv = ClCsv(self.PRFX + 'test_write_csv')
        wr_csv.set_column(u'', [u'в1', u'в2', u'в3', u'в4'])
        wr_csv.set_column(u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4'])
        wr_csv.save()

        wr_lines = open(wr_csv.file_object.name, 'rb').readlines()
        gold_lines_unicode = open(self.gold_csv_unicode.name, 'rb').readlines()
        assert gold_lines_unicode == wr_lines
예제 #12
0
    def test_06_gets(self):
        # test the functions which get
        rd_csv = ClCsv(self.gold_csv.name)

        assert rd_csv.get_colnumber('issn3') == 3
        assert rd_csv.get_colnumber('pineapple') == None

        assert rd_csv.get_rownumber('q4') == 4
        assert rd_csv.get_rownumber('nothing') == None
예제 #13
0
 def test_01_read_01(self):
     # Check that reading the gold standard file gives the right object
     clcsv = ClCsv(self.gold_csv.name)
     assert clcsv.get_column(1) == ('issn1',
                                    ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
     assert clcsv.get_column('issn1') == ('issn1',
                                          ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
     assert clcsv.get_column(0) == ('', ['q1', 'q2', 'q3', 'q4'])
     assert clcsv.get_column('issn4') == ('issn4',
                                          ['19', '', 'i4a3', 'i4a4'])
예제 #14
0
 def test_03_read_03(self):
     # When the file object is closed
     assert self.gold_csv.closed
     clcsv = ClCsv(self.gold_csv)
     assert clcsv.get_column(3) == ('issn3',
                                    ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
예제 #15
0
 def test_02_read_02(self):
     # Create an open file object first and pass it in (a different form of CSV creation)
     f = open(self.gold_csv.name, 'rb')
     clcsv = ClCsv(f)
     assert clcsv.get_column(3) == ('issn3',
                                    ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
예제 #16
0
 def test_08_read_unicode(self):
     # Check that reading the gold standard file gives the right object
     clcsv = ClCsv(self.gold_csv_unicode.name)
     assert clcsv.get_column(1) == (u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4'])
     assert clcsv.get_column(u'иссн1') == (u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4'])
     assert clcsv.get_column(0) == (u'', [u'в1', u'в2', u'в3', u'в4'])
예제 #17
0
    def _invalid_content(self):
        sheet = ClCsv("invalid.csv")

        # first column is the questions
        qs = reapplication.Suggestion2QuestionXwalk.question_list()
        sheet.set_column("", qs)

        # add 3 columns of results for testing purposes
        c1 = deepcopy(APPLICATION_COL)
        c1[0] = "First Title"
        c1[1] = "This isn't a URL (but it should be)"
        c1[3] = "1234-5678"
        c1[4] = "9876-5432"
        sheet.set_column(c1[3], c1)

        c2 = deepcopy(APPLICATION_COL)
        c2[0] = "Second Title"
        c2[3] = "2345-6789"
        c2[4] = "8765-4321"
        c2[11] = "" # This field is required (country)
        sheet.set_column(c2[3], c2)

        c3 = deepcopy(APPLICATION_COL)
        c3[0] = "Third Title"
        c3[3] = "3456-7890"
        c3[4] = "7654-3210"
        sheet.set_column(c3[3], c3)

        sheet.save()
예제 #18
0
 def test_02_read_02(self):
     # Create an open file object first and pass it in (a different form of CSV creation)
     f = open(self.gold_csv.name, 'rb')
     clcsv = ClCsv(f)
     assert clcsv.get_column(3) == ('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
예제 #19
0
    def _invalid_content(self):
        sheet = ClCsv("invalid.csv")

        # first column is the questions
        qs = reapplication.Suggestion2QuestionXwalk.question_list()
        sheet.set_column("", qs)

        # add 3 columns of results for testing purposes
        c1 = deepcopy(APPLICATION_COL)
        c1[0] = "First Title"
        c1[1] = "This isn't a URL (but it should be)"
        c1[3] = "1234-5678"
        c1[4] = "9876-5432"
        sheet.set_column(c1[3], c1)

        c2 = deepcopy(APPLICATION_COL)
        c2[0] = "Second Title"
        c2[3] = "2345-6789"
        c2[4] = "8765-4321"
        c2[11] = "" # This field is required (country)
        sheet.set_column(c2[3], c2)

        c3 = deepcopy(APPLICATION_COL)
        c3[0] = "Third Title"
        c3[3] = "3456-7890"
        c3[4] = "7654-3210"
        sheet.set_column(c3[3], c3)

        c4 = deepcopy(APPLICATION_COL)
        c4[0] = "Fourth Title"
        c4[3] = "4567-8901"
        c4[4] = "6543-2109"
        c4[54] = "invalid url"
        sheet.set_column(c4[3], c4)

        for c_num in range(5, 80):  # columns 1-4 are defined already, and we need, say, 26 * 3 columns for test 16 (so we can comfortably pick one from the middle and have it be > 27th column), so roughly 78, so why not 79?
            c_num = str(c_num)
            col = deepcopy(APPLICATION_COL)
            col[0] = c_num + " Title"
            col[3] = "6529-540" if len(c_num) == 1 else "6529-54"
            col[3] += c_num
            col[54] = "invalid url"
            sheet.set_column(col[3], col)

        sheet.save()
예제 #20
0
    def test_04_write_01(self):
        # write an object to a file, and check against pre-bult one
        wr_csv = ClCsv(self.PRFX + 'test_write_csv')
        wr_csv.set_column('', ['q1', 'q2', 'q3', 'q4'])
        wr_csv.set_column('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
        wr_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'i2a4'])
        wr_csv.set_column('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
        wr_csv.set_column('issn4', [19, None, 'i4a3', 'i4a4'])
        wr_csv.save()

        wr_lines = open(wr_csv.file_object.name, 'rb').readlines()
        gold_lines = open(self.gold_csv.name, 'rb').readlines()
        assert gold_lines == wr_lines
예제 #21
0
    def test_05_write_02(self):
        # Check we can overwrite an existing column.
        ow_csv = ClCsv(self.PRFX + 'test_overwrite_csv')
        ow_csv.set_column('', ['q1', 'q2', 'q3', 'q4'])
        ow_csv.set_column('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
        ow_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'WRONG'])
        ow_csv.set_column('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
        ow_csv.set_column('issnX', ['iXa1', 'iXa2', 'iXa3', 'iXa4'])
        ow_csv.save()

        ow_csv = ClCsv(ow_csv.file_object.name)
        ow_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'i2a4'])
        ow_csv.set_column(4, ('issn4', [19, None, 'i4a3', 'i4a4']))
        ow_csv.save()

        # The changes above should make the file the same as our gold standard
        ow_lines = open(ow_csv.file_object.name, 'rb').readlines()
        gold_lines = open(self.gold_csv.name, 'rb').readlines()
        assert gold_lines == ow_lines
예제 #22
0
    def test_04_write_01(self):
        # write an object to a file, and check against pre-bult one
        wr_csv = ClCsv(self.PRFX + 'test_write_csv')
        wr_csv.set_column('', ['q1', 'q2', 'q3', 'q4'])
        wr_csv.set_column('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
        wr_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'i2a4'])
        wr_csv.set_column('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
        wr_csv.set_column('issn4', [19, None, 'i4a3', 'i4a4'])
        wr_csv.save()

        wr_lines = open(wr_csv.file_object.name, 'rb').readlines()
        gold_lines = open(self.gold_csv.name, 'rb').readlines()
        assert gold_lines == wr_lines
예제 #23
0
        il = [s.strip() for s in i.split(',')]
        j = Journal.find_by_issn(il, in_doaj=False)
        if len(j) == 0:
            subjects_column.append('Error: not found')
        elif len(j) == 1:
            subj = j[0].bibjson().subjects()
            subjects_column.append(', '.join(
                [f"{s['scheme']}:{s['code']} - {s['term']}" for s in subj]))
        else:
            subjects_column.append(
                'Error: multiple records found for that ISSN')

    return subjects_column


if __name__ == '__main__':
    # Steps for issue 2337

    csv = ClCsv(
        '/home/cloo/DOAJ_removed_22012020_SuspectedEditorialMisconduct.1.csv')
    heading, issns = csv.get_column('ISSN')

    subjects = lookup_subject_categories(issns)
    newcsv = ClCsv(
        '/home/cloo/DOAJ_removed_22012020_SuspectedEditorialMisconduct.2.csv')
    newcsv.data = deepcopy(csv.data)

    # Add subject data to new CSV as extra column
    newcsv.set_column('Subjects', subjects)
    newcsv.save()
예제 #24
0
 def test_03_read_03(self):
     # When the file object is closed
     assert self.gold_csv.closed
     clcsv = ClCsv(self.gold_csv)
     assert clcsv.get_column(3) == ('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
예제 #25
0
    def test_05_write_02(self):
        # Check we can overwrite an existing column.
        ow_csv = ClCsv(self.PRFX + 'test_overwrite_csv')
        ow_csv.set_column('', ['q1', 'q2', 'q3', 'q4'])
        ow_csv.set_column('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4'])
        ow_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'WRONG'])
        ow_csv.set_column('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
        ow_csv.set_column('issnX', ['iXa1', 'iXa2', 'iXa3', 'iXa4'])
        ow_csv.save()

        ow_csv = ClCsv(ow_csv.file_object.name)
        ow_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'i2a4'])
        ow_csv.set_column(4, ('issn4', [19, None, 'i4a3', 'i4a4']))
        ow_csv.save()

        # The changes above should make the file the same as our gold standard
        ow_lines = open(ow_csv.file_object.name, 'rb').readlines()
        gold_lines = open(self.gold_csv.name, 'rb').readlines()
        assert gold_lines == ow_lines
예제 #26
0
    def _make_valid_csv(self):
        sheet = ClCsv("valid.csv")

        # first column is the questions
        qs = reapplication.Suggestion2QuestionXwalk.question_list()
        sheet.set_column("", qs)

        # add 3 columns of results for testing purposes
        c1 = deepcopy(APPLICATION_COL)
        c1[0] = "First Title"
        c1[3] = "1234-5678"
        c1[4] = "9876-5432"
        sheet.set_column(c1[3], c1)

        c2 = deepcopy(APPLICATION_COL)
        c2[0] = "Second Title"
        c2[3] = "2345-6789"
        c2[4] = "8765-4321"
        sheet.set_column(c2[3], c2)

        c3 = deepcopy(APPLICATION_COL)
        c3[0] = "Third Title"
        c3[3] = "3456-7890"
        c3[4] = "7654-3210"
        sheet.set_column(c3[3], c3)

        sheet.save()
예제 #27
0
    def _make_valid_csv(self):
        sheet = ClCsv("valid.csv")

        # first column is the questions
        qs = reapplication.Suggestion2QuestionXwalk.question_list()
        sheet.set_column("", qs)

        # add 3 columns of results for testing purposes
        c1 = deepcopy(APPLICATION_COL)
        c1[0] = "First Title"
        c1[3] = "1234-5678"
        c1[4] = "9876-5432"
        sheet.set_column(c1[3], c1)

        c2 = deepcopy(APPLICATION_COL)
        c2[0] = "Second Title"
        c2[3] = "2345-6789"
        c2[4] = "8765-4321"
        sheet.set_column(c2[3], c2)

        c3 = deepcopy(APPLICATION_COL)
        c3[0] = "Third Title"
        c3[3] = "3456-7890"
        c3[4] = "7654-3210"
        sheet.set_column(c3[3], c3)

        sheet.save()