def test_01_read_01(self): # Check that reading the gold standard file gives the right object clcsv = ClCsv(self.gold_csv.name) assert clcsv.get_column(1) == ('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4']) assert clcsv.get_column('issn1') == ('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4']) assert clcsv.get_column(0) == ('', ['q1', 'q2', 'q3', 'q4']) assert clcsv.get_column('issn4') == ('issn4', ['19', '', 'i4a3', 'i4a4'])
def test_08_read_unicode(self): # Check that reading the gold standard file gives the right object clcsv = ClCsv(self.gold_csv_unicode.name) assert clcsv.get_column(1) == ('иссн1', ['ила1', 'ила2', 'ила3', 'ила4']) assert clcsv.get_column('иссн1') == ('иссн1', ['ила1', 'ила2', 'ила3', 'ила4']) assert clcsv.get_column(0) == ('', ['в1', 'в2', 'в3', 'в4'])
def test_06_gets(self): # test the functions which get rd_csv = ClCsv(self.gold_csv.name) assert rd_csv.get_colnumber('issn3') == 3 assert rd_csv.get_colnumber('pineapple') == None assert rd_csv.get_rownumber('q4') == 4 assert rd_csv.get_rownumber('nothing') == None
def make_csv(path, reapps): cols = {} for r in reapps: assert isinstance(r, models.Suggestion) # for pycharm type inspection bj = r.bibjson() issn = bj.get_one_identifier(idtype=bj.P_ISSN) if issn is None: issn = bj.get_one_identifier(idtype=bj.E_ISSN) if issn is None: continue kvs = Suggestion2QuestionXwalk.suggestion2question(r) cols[issn] = kvs issns = cols.keys() issns.sort() sheet = ClCsv(path) qs = None for i in issns: if qs is None: qs = [q for q, _ in cols[i]] sheet.set_column("", qs) vs = [v for _, v in cols[i]] sheet.set_column(i, vs) sheet.save()
def open_csv(path): try: sheet = ClCsv(path) except: raise CsvValidationException( "Unable to open CSV - may be corrupt or a different file format.") return sheet
def _wrong_questions(self): sheet = ClCsv("wrong_questions.csv") sheet.set_column("", ["Q" + str(i) for i in range(56)]) c1 = deepcopy(APPLICATION_COL) c1[0] = "First Title" c1[3] = "1234-5678" c1[4] = "9876-5432" sheet.set_column(c1[3], c1) sheet.save()
def test_07_write_unicode(self): # write an object to a file, and check against pre-bult one wr_csv = ClCsv(self.PRFX + 'test_write_csv') wr_csv.set_column(u'', [u'в1', u'в2', u'в3', u'в4']) wr_csv.set_column(u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4']) wr_csv.save() wr_lines = open(wr_csv.file_object.name, 'rb').readlines() gold_lines_unicode = open(self.gold_csv_unicode.name, 'rb').readlines() assert gold_lines_unicode == wr_lines
def _wrong_issn(self): sheet = ClCsv("wrong_issn.csv") qs = reapplication.Suggestion2QuestionXwalk.question_list() sheet.set_column("", qs) c1 = deepcopy(APPLICATION_COL) c1[0] = "First Title" c1[3] = "6754-0000" c1[4] = "8776-0998" sheet.set_column(c1[3], c1) sheet.save()
def test_03_read_03(self): # When the file object is closed assert self.gold_csv.closed clcsv = ClCsv(self.gold_csv) assert clcsv.get_column(3) == ('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
def test_02_read_02(self): # Create an open file object first and pass it in (a different form of CSV creation) f = open(self.gold_csv.name, 'rb') clcsv = ClCsv(f) assert clcsv.get_column(3) == ('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4'])
def test_08_read_unicode(self): # Check that reading the gold standard file gives the right object clcsv = ClCsv(self.gold_csv_unicode.name) assert clcsv.get_column(1) == (u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4']) assert clcsv.get_column(u'иссн1') == (u'иссн1', [u'ила1', u'ила2', u'ила3', u'ила4']) assert clcsv.get_column(0) == (u'', [u'в1', u'в2', u'в3', u'в4'])
def _invalid_content(self): sheet = ClCsv("invalid.csv") # first column is the questions qs = reapplication.Suggestion2QuestionXwalk.question_list() sheet.set_column("", qs) # add 3 columns of results for testing purposes c1 = deepcopy(APPLICATION_COL) c1[0] = "First Title" c1[1] = "This isn't a URL (but it should be)" c1[3] = "1234-5678" c1[4] = "9876-5432" sheet.set_column(c1[3], c1) c2 = deepcopy(APPLICATION_COL) c2[0] = "Second Title" c2[3] = "2345-6789" c2[4] = "8765-4321" c2[11] = "" # This field is required (country) sheet.set_column(c2[3], c2) c3 = deepcopy(APPLICATION_COL) c3[0] = "Third Title" c3[3] = "3456-7890" c3[4] = "7654-3210" sheet.set_column(c3[3], c3) sheet.save()
def _invalid_content(self): sheet = ClCsv("invalid.csv") # first column is the questions qs = reapplication.Suggestion2QuestionXwalk.question_list() sheet.set_column("", qs) # add 3 columns of results for testing purposes c1 = deepcopy(APPLICATION_COL) c1[0] = "First Title" c1[1] = "This isn't a URL (but it should be)" c1[3] = "1234-5678" c1[4] = "9876-5432" sheet.set_column(c1[3], c1) c2 = deepcopy(APPLICATION_COL) c2[0] = "Second Title" c2[3] = "2345-6789" c2[4] = "8765-4321" c2[11] = "" # This field is required (country) sheet.set_column(c2[3], c2) c3 = deepcopy(APPLICATION_COL) c3[0] = "Third Title" c3[3] = "3456-7890" c3[4] = "7654-3210" sheet.set_column(c3[3], c3) c4 = deepcopy(APPLICATION_COL) c4[0] = "Fourth Title" c4[3] = "4567-8901" c4[4] = "6543-2109" c4[54] = "invalid url" sheet.set_column(c4[3], c4) for c_num in range(5, 80): # columns 1-4 are defined already, and we need, say, 26 * 3 columns for test 16 (so we can comfortably pick one from the middle and have it be > 27th column), so roughly 78, so why not 79? c_num = str(c_num) col = deepcopy(APPLICATION_COL) col[0] = c_num + " Title" col[3] = "6529-540" if len(c_num) == 1 else "6529-54" col[3] += c_num col[54] = "invalid url" sheet.set_column(col[3], col) sheet.save()
def test_04_write_01(self): # write an object to a file, and check against pre-bult one wr_csv = ClCsv(self.PRFX + 'test_write_csv') wr_csv.set_column('', ['q1', 'q2', 'q3', 'q4']) wr_csv.set_column('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4']) wr_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'i2a4']) wr_csv.set_column('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4']) wr_csv.set_column('issn4', [19, None, 'i4a3', 'i4a4']) wr_csv.save() wr_lines = open(wr_csv.file_object.name, 'rb').readlines() gold_lines = open(self.gold_csv.name, 'rb').readlines() assert gold_lines == wr_lines
def test_05_write_02(self): # Check we can overwrite an existing column. ow_csv = ClCsv(self.PRFX + 'test_overwrite_csv') ow_csv.set_column('', ['q1', 'q2', 'q3', 'q4']) ow_csv.set_column('issn1', ['i1a1', 'i1a2', 'i1a3', 'i1a4']) ow_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'WRONG']) ow_csv.set_column('issn3', ['i3a1', 'i3a2', 'i3a3', 'i3a4']) ow_csv.set_column('issnX', ['iXa1', 'iXa2', 'iXa3', 'iXa4']) ow_csv.save() ow_csv = ClCsv(ow_csv.file_object.name) ow_csv.set_column('issn2', ['i2a1', 'i2a2', 'i2a3', 'i2a4']) ow_csv.set_column(4, ('issn4', [19, None, 'i4a3', 'i4a4'])) ow_csv.save() # The changes above should make the file the same as our gold standard ow_lines = open(ow_csv.file_object.name, 'rb').readlines() gold_lines = open(self.gold_csv.name, 'rb').readlines() assert gold_lines == ow_lines
il = [s.strip() for s in i.split(',')] j = Journal.find_by_issn(il, in_doaj=False) if len(j) == 0: subjects_column.append('Error: not found') elif len(j) == 1: subj = j[0].bibjson().subjects() subjects_column.append(', '.join( [f"{s['scheme']}:{s['code']} - {s['term']}" for s in subj])) else: subjects_column.append( 'Error: multiple records found for that ISSN') return subjects_column if __name__ == '__main__': # Steps for issue 2337 csv = ClCsv( '/home/cloo/DOAJ_removed_22012020_SuspectedEditorialMisconduct.1.csv') heading, issns = csv.get_column('ISSN') subjects = lookup_subject_categories(issns) newcsv = ClCsv( '/home/cloo/DOAJ_removed_22012020_SuspectedEditorialMisconduct.2.csv') newcsv.data = deepcopy(csv.data) # Add subject data to new CSV as extra column newcsv.set_column('Subjects', subjects) newcsv.save()
def _make_valid_csv(self): sheet = ClCsv("valid.csv") # first column is the questions qs = reapplication.Suggestion2QuestionXwalk.question_list() sheet.set_column("", qs) # add 3 columns of results for testing purposes c1 = deepcopy(APPLICATION_COL) c1[0] = "First Title" c1[3] = "1234-5678" c1[4] = "9876-5432" sheet.set_column(c1[3], c1) c2 = deepcopy(APPLICATION_COL) c2[0] = "Second Title" c2[3] = "2345-6789" c2[4] = "8765-4321" sheet.set_column(c2[3], c2) c3 = deepcopy(APPLICATION_COL) c3[0] = "Third Title" c3[3] = "3456-7890" c3[4] = "7654-3210" sheet.set_column(c3[3], c3) sheet.save()