def test_nonsense(self): self.add_sss_scholars(self.test_nonsense, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() # TC1: Test that DBLP does not return anything assert len(self.scholars) == 0
def test_mauro_caporuscio(self): self.add_sss_scholars(self.test_nonascii_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars( ) # This involves dealing with non-ASCII characters self.scholars = self.miner.get_scholars() mauro = None for scholar in self.scholars: if scholar.name == "Mauro Caporuscio": mauro = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that Mauro has at least 40 DBLP entries assert mauro.dblp_entries >= 40 # TC3: Test that the name is correctly processed assert mauro.name == "Mauro Caporuscio" # TC4: Test that Mauro has at least 30 publications after cleaning the list assert mauro.nbr_publications >= 30 # TC5: Test that Mauro has non-zero ratios assert mauro.first_ratio >= 0.01 assert mauro.sci_ratio >= 0.01 assert mauro.nbr_sci_publications >= 1 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size > 0 assert file_stats_csv.st_size > 0 # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert mauro.sss_contrib >= 1.50 assert mauro.sss_rating >= 1.00 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
def test_david_notkin(self): self.add_sss_scholars(self.test_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() david = None for scholar in self.scholars: if scholar.name == "David Notkin": david = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that David Notkin has 152 DBLP entries assert david.dblp_entries == 152 # TC3: Test that the name is correctly processed assert david.name == "David Notkin" # TC4: Test that David Notkin has 134 publications after cleaning the list assert david.nbr_publications == 134 # TC5: Test that David Notkin has the correct ratios assert david.first_ratio == pytest.approx(0.24, 0.01) assert david.sci_ratio == pytest.approx(0.16, 0.01) assert david.nbr_sci_publications == 22 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size == pytest.approx(1149, 1) assert file_stats_csv.st_size == pytest.approx(67, 1) # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert david.sss_contrib == 6.02 assert david.sss_rating == 24.12 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
def test_thomas_olsson(self): self.add_sss_scholars(self.test_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() thomas = None for scholar in self.scholars: if scholar.name == "Thomas Olsson": thomas = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that Thomas Olsson has at least 40 DBLP entries assert thomas.dblp_entries >= 40 # TC3: Test that the name is correctly processed assert thomas.name == "Thomas Olsson" # TC4: Test that Thomas Olsson has at least 30 publications after cleaning the list assert thomas.nbr_publications >= 30 # TC5: Test that Thomas Olsson has non-zero ratios assert thomas.first_ratio >= 0.01 assert thomas.sci_ratio >= 0.01 assert thomas.nbr_sci_publications >= 1 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size > 0 assert file_stats_csv.st_size > 0 # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert thomas.sss_contrib >= 1.50 assert thomas.sss_rating >= 1.00 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
def test_simon_poulding(self): self.add_sss_scholars(self.test_scholars, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() simon = None for scholar in self.scholars: if scholar.name == "Simon M. Poulding": simon = scholar # TC1: Test that DBLP returns a result assert len(self.scholars) == 1 # TC2: Test that Simon Poulding has 48 DBLP entries assert simon.dblp_entries == 48 # TC3: Test that the name is correctly processed assert simon.name == "Simon M. Poulding" # TC4: Test that Simon Poulding has 41 publications after cleaning the list assert simon.nbr_publications == 41 # TC5: Test that Simon Poulding has the correct ratios assert simon.first_ratio == pytest.approx(0.37, 0.01) assert simon.sci_ratio == pytest.approx(0.17, 0.01) assert simon.nbr_sci_publications == 7 # TC6: Test write to txt-file self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size == pytest.approx(476, 1) assert file_stats_csv.st_size == pytest.approx(139, 1) # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert simon.sss_contrib == 2.84 assert simon.sss_rating == 8.5 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
def test_richard_holst(self): self.add_sss_scholars(self.test_scholars, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() richard = None for scholar in self.scholars: if scholar.name == "Richard C. Holt": richard = scholar # TC1: Test that Richard is removed as a non-SCI first-author assert richard is None
class TestClass_NonSense: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_nonsense = [("ABCDEFGH", "ijklmno", "A¿£$‰")] def add_sss_scholars(self, process_list, affiliation): for person in process_list: name = person[0] running_number = person[1] url = person[2] # extract the pid from the url by substringing try: split1 = url.split("pid/") split2 = split1[1].split(".xml") pid = split2[0] except IndexError: print("Invalid format of input XML URL.") return self.scholars.append( SSSScholar(name, running_number, pid, url, affiliation, -1)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_nonsense(self): self.add_sss_scholars(self.test_nonsense, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() # TC1: Test that DBLP does not return anything assert len(self.scholars) == 0
class TestClass_NonSense: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_nonsense = ["ABCDEFGH", "A¿£$‰"] def add_sss_scholars(self, process_list, affiliation): for name in process_list: words = name.split() # check if author has a running number if not words[len(words) - 1].isdigit(): self.scholars.append(SSSScholar(name, -1, affiliation)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next((x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 else: # author has a running number tmp_scholar = SSSScholar(' '.join(map(str, words[0:len(words) - 1])), str(words[len(words) - 1]), affiliation) self.scholars.append(tmp_scholar) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.scholars.append(tmp_aff) else: curr = next((x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_nonsense(self): self.add_sss_scholars(self.test_nonsense, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() # TC1: Test that DBLP does not return anything assert len(self.scholars) == 0
else: custom_list = [] custom_list.append(sys.argv[1]) add_sss_scholars(custom_list, "N/A") # Prepare the process subdirectory = "output" try: os.mkdir(subdirectory) except Exception: pass filename_prefix = os.path.join(subdirectory, str(date.today()) + "_sss_") # 1. Mine the scholars, write the results print("####### Step 1 - Mining scholars #######") miner = ScholarMiner(filename_prefix, sss_scholars, sss_affiliations) miner.process_group() miner.write_results() sss_scholars = miner.get_scholars() # 2. Analyze the scholars, remove affiliations with no first-authored SCI publications, write the results print("\n####### Step 2 - Analyzing scholars #######") for scholar in sss_scholars: curr = next((x for x in sss_affiliations if scholar.affiliation == x.name), None) curr.nbr_first_sci += scholar.nbr_first_sci tmp_affiliations = [] for affiliation in sss_affiliations: # keep only affiliations with SSS scholars if affiliation.nbr_first_sci > 0: tmp_affiliations.append(affiliation)
class TestClass_NonASCIITitles: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_nonascii_scholar = [ ("Mauro Caporuscio", "-1", "https://dblp.org/pid/c/MauroCaporuscio.xml") ] def add_sss_scholars(self, process_list, affiliation): for person in process_list: name = person[0] running_number = person[1] url = person[2] # extract the pid from the url by substringing try: split1 = url.split("pid/") split2 = split1[1].split(".xml") pid = split2[0] except IndexError: print("Invalid format of input XML URL.") return self.scholars.append( SSSScholar(name, running_number, pid, url, affiliation, -1)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_mauro_caporuscio(self): self.add_sss_scholars(self.test_nonascii_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars( ) # This involves dealing with non-ASCII characters self.scholars = self.miner.get_scholars() mauro = None for scholar in self.scholars: if scholar.name == "Mauro Caporuscio": mauro = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that Mauro has at least 40 DBLP entries assert mauro.dblp_entries >= 40 # TC3: Test that the name is correctly processed assert mauro.name == "Mauro Caporuscio" # TC4: Test that Mauro has at least 30 publications after cleaning the list assert mauro.nbr_publications >= 30 # TC5: Test that Mauro has non-zero ratios assert mauro.first_ratio >= 0.01 assert mauro.sci_ratio >= 0.01 assert mauro.nbr_sci_publications >= 1 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size > 0 assert file_stats_csv.st_size > 0 # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert mauro.sss_contrib >= 1.50 assert mauro.sss_rating >= 1.00 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
class TestClass_OneScholar: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_scholar = ["David Notkin"] def add_sss_scholars(self, process_list, affiliation): for name in process_list: words = name.split() # check if author has a running number if not words[len(words) - 1].isdigit(): self.scholars.append(SSSScholar(name, -1, affiliation)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 else: # author has a running number tmp_scholar = SSSScholar( ' '.join(map(str, words[0:len(words) - 1])), str(words[len(words) - 1]), affiliation) self.scholars.append(tmp_scholar) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.scholars.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_david_notkin(self): self.add_sss_scholars(self.test_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() david = None for scholar in self.scholars: if scholar.name == "David Notkin": david = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that David Notkin has 152 DBLP entries assert david.dblp_entries == 152 # TC3: Test that the name is correctly processed assert david.name == "David Notkin" # TC4: Test that David Notkin has 134 publications after cleaning the list assert david.nbr_publications == 134 # TC5: Test that David Notkin has the correct ratios assert david.first_ratio == pytest.approx(0.24, 0.01) assert david.sci_ratio == pytest.approx(0.16, 0.01) assert david.nbr_sci_publications == 22 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size == pytest.approx(1149, 1) assert file_stats_csv.st_size == pytest.approx(67, 1) # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert david.sss_contrib == 6.02 assert david.sss_rating == 24.12 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
class TestClass_TwoScholars: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_scholars = [("Simon M. Poulding", "-1", "https://dblp.org/pid/93/6877.xml"), ("Richard C. Holt", "-1", "https://dblp.org/pid/h/RichardCHolt.xml")] def add_sss_scholars(self, process_list, affiliation): for person in process_list: name = person[0] running_number = person[1] url = person[2] # extract the pid from the url by substringing try: split1 = url.split("pid/") split2 = split1[1].split(".xml") pid = split2[0] except IndexError: print("Invalid format of input XML URL.") return self.scholars.append( SSSScholar(name, running_number, pid, url, affiliation, -1)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_simon_poulding(self): self.add_sss_scholars(self.test_scholars, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() simon = None for scholar in self.scholars: if scholar.name == "Simon M. Poulding": simon = scholar # TC1: Test that DBLP returns a result assert len(self.scholars) == 1 # TC2: Test that Simon Poulding has 48 DBLP entries assert simon.dblp_entries == 48 # TC3: Test that the name is correctly processed assert simon.name == "Simon M. Poulding" # TC4: Test that Simon Poulding has 41 publications after cleaning the list assert simon.nbr_publications == 41 # TC5: Test that Simon Poulding has the correct ratios assert simon.first_ratio == pytest.approx(0.37, 0.01) assert simon.sci_ratio == pytest.approx(0.17, 0.01) assert simon.nbr_sci_publications == 7 # TC6: Test write to txt-file self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size == pytest.approx(476, 1) assert file_stats_csv.st_size == pytest.approx(139, 1) # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert simon.sss_contrib == 2.84 assert simon.sss_rating == 8.5 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables() def test_richard_holst(self): self.add_sss_scholars(self.test_scholars, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() richard = None for scholar in self.scholars: if scholar.name == "Richard C. Holt": richard = scholar # TC1: Test that Richard is removed as a non-SCI first-author assert richard is None
class TestClass_OneScholar: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_scholar = [("David Notkin", "-1", "https://dblp.org/pid/n/DavidNotkin.xml")] def add_sss_scholars(self, process_list, affiliation): for person in process_list: name = person[0] running_number = person[1] url = person[2] # extract the pid from the url by substringing try: split1 = url.split("pid/") split2 = split1[1].split(".xml") pid = split2[0] except IndexError: print("Invalid format of input XML URL.") return self.scholars.append( SSSScholar(name, running_number, pid, url, affiliation, -1)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_david_notkin(self): self.add_sss_scholars(self.test_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.parse_scholars() self.scholars = self.miner.get_scholars() david = None for scholar in self.scholars: if scholar.name == "David Notkin": david = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that David Notkin has 159 DBLP entries assert david.dblp_entries == 159 # TC3: Test that the name is correctly processed assert david.name == "David Notkin" # TC4: Test that David Notkin has 126 publications after cleaning the list assert david.nbr_publications == 126 # TC5: Test that David Notkin has the correct ratios assert david.first_ratio == pytest.approx(0.18, 0.01) assert david.sci_ratio == pytest.approx(0.17, 0.01) assert david.nbr_sci_publications == 22 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size == pytest.approx(1149, 1) assert file_stats_csv.st_size == pytest.approx(67, 1) # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert david.sss_contrib == 5.94 assert david.sss_rating == 21.82 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()
class TestClass_TwoScholars: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_scholars = ["Simon M. Poulding", "Richard C. Holt"] def add_sss_scholars(self, process_list, affiliation): for name in process_list: words = name.split() # check if author has a running number if not words[len(words) - 1].isdigit(): self.scholars.append(SSSScholar(name, -1, affiliation)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 else: # author has a running number tmp_scholar = SSSScholar( ' '.join(map(str, words[0:len(words) - 1])), str(words[len(words) - 1]), affiliation) self.scholars.append(tmp_scholar) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.scholars.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_simon_poulding(self): self.add_sss_scholars(self.test_scholars, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() simon = None for scholar in self.scholars: if scholar.name == "Simon M. Poulding": simon = scholar # TC1: Test that DBLP returns a result assert len(self.scholars) == 1 # TC2: Test that Simon Poulding has 48 DBLP entries assert simon.dblp_entries == 48 # TC3: Test that the name is correctly processed assert simon.name == "Simon M. Poulding" # TC4: Test that Simon Poulding has 42 publications after cleaning the list assert simon.nbr_publications == 42 # TC5: Test that Simon Poulding has the correct ratios assert simon.first_ratio == pytest.approx(0.38, 0.01) assert simon.sci_ratio == pytest.approx(0.17, 0.01) assert simon.nbr_sci_publications == 7 # TC6: Test write to txt-file self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size == pytest.approx(476, 1) assert file_stats_csv.st_size == pytest.approx(139, 1) # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert simon.sss_contrib == 2.85 assert simon.sss_rating == 8.75 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables() def test_richard_holst(self): self.add_sss_scholars(self.test_scholars, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() richard = None for scholar in self.scholars: if scholar.name == "Richard C. Holt": richard = scholar # TC1: Test that Richard is removed as a non-SCI first-author assert richard is None
class TestClass_RunningNumber: def setup_method(self): self.scholars = [] self.affiliations = [] self.filename_prefix = str(date.today()) + "_swese_" self.test_scholar = ["Thomas Olsson 0001"] def add_sss_scholars(self, process_list, affiliation): for name in process_list: words = name.split() # check if author has a running number if not words[len(words) - 1].isdigit(): self.scholars.append(SSSScholar(name, -1, affiliation)) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.scholars.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 else: # author has a running number tmp_scholar = SSSScholar( ' '.join(map(str, words[0:len(words) - 1])), str(words[len(words) - 1]), affiliation) self.scholars.append(tmp_scholar) tmp_aff = SSSAffiliation(affiliation) if tmp_aff not in self.affiliations: tmp_aff.nbr_scholars += 1 self.affiliations.append(tmp_aff) else: curr = next( (x for x in self.affiliations if affiliation == x.name), None) curr.nbr_scholars += 1 def test_thomas_olsson(self): self.add_sss_scholars(self.test_scholar, "N/A") self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations) self.miner.process_group() self.scholars = self.miner.get_scholars() thomas = None for scholar in self.scholars: if scholar.name == "Thomas Olsson": thomas = scholar # TC1: Test that DBLP returns a result assert self.scholars != None assert len(self.scholars) == 1 # TC2: Test that Thomas Olsson has at least 40 DBLP entries assert thomas.dblp_entries >= 40 # TC3: Test that the name is correctly processed assert thomas.name == "Thomas Olsson" # TC4: Test that Thomas Olsson has at least 30 publications after cleaning the list assert thomas.nbr_publications >= 30 # TC5: Test that Thomas Olsson has non-zero ratios assert thomas.first_ratio >= 0.01 assert thomas.sci_ratio >= 0.01 assert thomas.nbr_sci_publications >= 1 # TC6: Test write results self.miner.write_results() filename_txt = self.filename_prefix + "1_miner.txt" filename_csv = self.filename_prefix + "1_miner.csv" assert os.path.exists(filename_txt) assert os.path.exists(filename_csv) # TC7: Test file sizes file_stats_txt = os.stat(filename_txt) file_stats_csv = os.stat(filename_csv) assert file_stats_txt.st_size > 0 assert file_stats_csv.st_size > 0 # TC8: Test analyzer analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations) analyzer.analyze_individual_research_interests() assert thomas.sss_contrib >= 1.50 assert thomas.sss_rating >= 1.00 # TC10: Test tabulator tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations) tabulator.write_tables()