def test_nonsense(self):
        self.add_sss_scholars(self.test_nonsense, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()

        # TC1: Test that DBLP does not return anything
        assert len(self.scholars) == 0
Example #2
0
    def test_mauro_caporuscio(self):
        self.add_sss_scholars(self.test_nonascii_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars(
        )  # This involves dealing with non-ASCII characters
        self.scholars = self.miner.get_scholars()
        mauro = None
        for scholar in self.scholars:
            if scholar.name == "Mauro Caporuscio":
                mauro = scholar

                # TC1: Test that DBLP returns a result
                assert self.scholars != None
                assert len(self.scholars) == 1

                # TC2: Test that Mauro has at least 40 DBLP entries
                assert mauro.dblp_entries >= 40

                # TC3: Test that the name is correctly processed
                assert mauro.name == "Mauro Caporuscio"

                # TC4: Test that Mauro has at least 30 publications after cleaning the list
                assert mauro.nbr_publications >= 30

                # TC5: Test that Mauro has non-zero ratios
                assert mauro.first_ratio >= 0.01
                assert mauro.sci_ratio >= 0.01
                assert mauro.nbr_sci_publications >= 1

                # TC6: Test write results
                self.miner.write_results()
                filename_txt = self.filename_prefix + "1_miner.txt"
                filename_csv = self.filename_prefix + "1_miner.csv"
                assert os.path.exists(filename_txt)
                assert os.path.exists(filename_csv)

                # TC7: Test file sizes
                file_stats_txt = os.stat(filename_txt)
                file_stats_csv = os.stat(filename_csv)
                assert file_stats_txt.st_size > 0
                assert file_stats_csv.st_size > 0

                # TC8: Test analyzer
                analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                           self.affiliations)
                analyzer.analyze_individual_research_interests()
                assert mauro.sss_contrib >= 1.50
                assert mauro.sss_rating >= 1.00

                # TC10: Test tabulator
                tabulator = ScholarTabulator(self.filename_prefix,
                                             self.scholars, self.affiliations)
                tabulator.write_tables()
Example #3
0
    def test_david_notkin(self):
        self.add_sss_scholars(self.test_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()
        david = None
        for scholar in self.scholars:
            if scholar.name == "David Notkin":
                david = scholar

        # TC1: Test that DBLP returns a result
        assert self.scholars != None
        assert len(self.scholars) == 1

        # TC2: Test that David Notkin has 152 DBLP entries
        assert david.dblp_entries == 152

        # TC3: Test that the name is correctly processed
        assert david.name == "David Notkin"

        # TC4: Test that David Notkin has 134 publications after cleaning the list
        assert david.nbr_publications == 134

        # TC5: Test that David Notkin has the correct ratios
        assert david.first_ratio == pytest.approx(0.24, 0.01)
        assert david.sci_ratio == pytest.approx(0.16, 0.01)
        assert david.nbr_sci_publications == 22

        # TC6: Test write results
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size == pytest.approx(1149, 1)
        assert file_stats_csv.st_size == pytest.approx(67, 1)

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert david.sss_contrib == 6.02
        assert david.sss_rating == 24.12

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()
    def test_thomas_olsson(self):
        self.add_sss_scholars(self.test_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()
        thomas = None
        for scholar in self.scholars:
            if scholar.name == "Thomas Olsson":
                thomas = scholar

        # TC1: Test that DBLP returns a result
        assert self.scholars != None
        assert len(self.scholars) == 1

        # TC2: Test that Thomas Olsson has at least 40 DBLP entries
        assert thomas.dblp_entries >= 40

        # TC3: Test that the name is correctly processed
        assert thomas.name == "Thomas Olsson"

        # TC4: Test that Thomas Olsson has at least 30 publications after cleaning the list
        assert thomas.nbr_publications >= 30

        # TC5: Test that Thomas Olsson has non-zero ratios
        assert thomas.first_ratio >= 0.01
        assert thomas.sci_ratio >= 0.01
        assert thomas.nbr_sci_publications >= 1

        # TC6: Test write results
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size > 0
        assert file_stats_csv.st_size > 0

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert thomas.sss_contrib >= 1.50
        assert thomas.sss_rating >= 1.00

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()
Example #5
0
    def test_simon_poulding(self):
        self.add_sss_scholars(self.test_scholars, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()
        simon = None
        for scholar in self.scholars:
            if scholar.name == "Simon M. Poulding":
                simon = scholar

        # TC1: Test that DBLP returns a result
        assert len(self.scholars) == 1

        # TC2: Test that Simon Poulding has 48 DBLP entries
        assert simon.dblp_entries == 48

        # TC3: Test that the name is correctly processed
        assert simon.name == "Simon M. Poulding"

        # TC4: Test that Simon Poulding has 41 publications after cleaning the list
        assert simon.nbr_publications == 41

        # TC5: Test that Simon Poulding has the correct ratios
        assert simon.first_ratio == pytest.approx(0.37, 0.01)
        assert simon.sci_ratio == pytest.approx(0.17, 0.01)
        assert simon.nbr_sci_publications == 7

        # TC6: Test write to txt-file
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size == pytest.approx(476, 1)
        assert file_stats_csv.st_size == pytest.approx(139, 1)

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert simon.sss_contrib == 2.84
        assert simon.sss_rating == 8.5

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()
Example #6
0
    def test_richard_holst(self):
        self.add_sss_scholars(self.test_scholars, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()
        richard = None
        for scholar in self.scholars:
            if scholar.name == "Richard C. Holt":
                richard = scholar

        # TC1: Test that Richard is removed as a non-SCI first-author
        assert richard is None
Example #7
0
class TestClass_NonSense:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_nonsense = [("ABCDEFGH", "ijklmno", "A¿£$‰")]

    def add_sss_scholars(self, process_list, affiliation):
        for person in process_list:
            name = person[0]
            running_number = person[1]
            url = person[2]
            # extract the pid from the url by substringing
            try:
                split1 = url.split("pid/")
                split2 = split1[1].split(".xml")
                pid = split2[0]
            except IndexError:
                print("Invalid format of input XML URL.")
                return

            self.scholars.append(
                SSSScholar(name, running_number, pid, url, affiliation, -1))
            tmp_aff = SSSAffiliation(affiliation)
            if tmp_aff not in self.affiliations:
                tmp_aff.nbr_scholars += 1
                self.affiliations.append(tmp_aff)
            else:
                curr = next(
                    (x for x in self.affiliations if affiliation == x.name),
                    None)
                curr.nbr_scholars += 1

    def test_nonsense(self):
        self.add_sss_scholars(self.test_nonsense, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()

        # TC1: Test that DBLP does not return anything
        assert len(self.scholars) == 0
class TestClass_NonSense:

    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_nonsense = ["ABCDEFGH", "A¿£$‰"]

    def add_sss_scholars(self, process_list, affiliation):
        for name in process_list:
            words = name.split()
            # check if author has a running number
            if not words[len(words) - 1].isdigit():
                self.scholars.append(SSSScholar(name, -1, affiliation))
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.affiliations.append(tmp_aff)
                else:
                    curr = next((x for x in self.affiliations if affiliation == x.name), None)
                    curr.nbr_scholars += 1
            else:
                # author has a running number
                tmp_scholar = SSSScholar(' '.join(map(str, words[0:len(words) - 1])), str(words[len(words) - 1]),
                                         affiliation)
                self.scholars.append(tmp_scholar)
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.scholars.append(tmp_aff)
                else:
                    curr = next((x for x in self.affiliations if affiliation == x.name), None)
                    curr.nbr_scholars += 1

    def test_nonsense(self):
        self.add_sss_scholars(self.test_nonsense, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()

        # TC1: Test that DBLP does not return anything
        assert len(self.scholars) == 0
Example #9
0
else:
    custom_list = []
    custom_list.append(sys.argv[1])
    add_sss_scholars(custom_list, "N/A")

# Prepare the process
subdirectory = "output"
try:
    os.mkdir(subdirectory)
except Exception:
    pass
filename_prefix = os.path.join(subdirectory, str(date.today()) + "_sss_")

# 1. Mine the scholars, write the results
print("####### Step 1 - Mining scholars #######")
miner = ScholarMiner(filename_prefix, sss_scholars, sss_affiliations)
miner.process_group()
miner.write_results()
sss_scholars = miner.get_scholars()

# 2. Analyze the scholars, remove affiliations with no first-authored SCI publications, write the results
print("\n####### Step 2 - Analyzing scholars #######")
for scholar in sss_scholars:
    curr = next((x for x in sss_affiliations if scholar.affiliation == x.name),
                None)
    curr.nbr_first_sci += scholar.nbr_first_sci
tmp_affiliations = []
for affiliation in sss_affiliations:
    # keep only affiliations with SSS scholars
    if affiliation.nbr_first_sci > 0:
        tmp_affiliations.append(affiliation)
Example #10
0
class TestClass_NonASCIITitles:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_nonascii_scholar = [
            ("Mauro Caporuscio", "-1",
             "https://dblp.org/pid/c/MauroCaporuscio.xml")
        ]

    def add_sss_scholars(self, process_list, affiliation):
        for person in process_list:
            name = person[0]
            running_number = person[1]
            url = person[2]
            # extract the pid from the url by substringing
            try:
                split1 = url.split("pid/")
                split2 = split1[1].split(".xml")
                pid = split2[0]
            except IndexError:
                print("Invalid format of input XML URL.")
                return

            self.scholars.append(
                SSSScholar(name, running_number, pid, url, affiliation, -1))
            tmp_aff = SSSAffiliation(affiliation)
            if tmp_aff not in self.affiliations:
                tmp_aff.nbr_scholars += 1
                self.affiliations.append(tmp_aff)
            else:
                curr = next(
                    (x for x in self.affiliations if affiliation == x.name),
                    None)
                curr.nbr_scholars += 1

    def test_mauro_caporuscio(self):
        self.add_sss_scholars(self.test_nonascii_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars(
        )  # This involves dealing with non-ASCII characters
        self.scholars = self.miner.get_scholars()
        mauro = None
        for scholar in self.scholars:
            if scholar.name == "Mauro Caporuscio":
                mauro = scholar

                # TC1: Test that DBLP returns a result
                assert self.scholars != None
                assert len(self.scholars) == 1

                # TC2: Test that Mauro has at least 40 DBLP entries
                assert mauro.dblp_entries >= 40

                # TC3: Test that the name is correctly processed
                assert mauro.name == "Mauro Caporuscio"

                # TC4: Test that Mauro has at least 30 publications after cleaning the list
                assert mauro.nbr_publications >= 30

                # TC5: Test that Mauro has non-zero ratios
                assert mauro.first_ratio >= 0.01
                assert mauro.sci_ratio >= 0.01
                assert mauro.nbr_sci_publications >= 1

                # TC6: Test write results
                self.miner.write_results()
                filename_txt = self.filename_prefix + "1_miner.txt"
                filename_csv = self.filename_prefix + "1_miner.csv"
                assert os.path.exists(filename_txt)
                assert os.path.exists(filename_csv)

                # TC7: Test file sizes
                file_stats_txt = os.stat(filename_txt)
                file_stats_csv = os.stat(filename_csv)
                assert file_stats_txt.st_size > 0
                assert file_stats_csv.st_size > 0

                # TC8: Test analyzer
                analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                           self.affiliations)
                analyzer.analyze_individual_research_interests()
                assert mauro.sss_contrib >= 1.50
                assert mauro.sss_rating >= 1.00

                # TC10: Test tabulator
                tabulator = ScholarTabulator(self.filename_prefix,
                                             self.scholars, self.affiliations)
                tabulator.write_tables()
Example #11
0
class TestClass_OneScholar:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_scholar = ["David Notkin"]

    def add_sss_scholars(self, process_list, affiliation):
        for name in process_list:
            words = name.split()
            # check if author has a running number
            if not words[len(words) - 1].isdigit():
                self.scholars.append(SSSScholar(name, -1, affiliation))
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.affiliations.append(tmp_aff)
                else:
                    curr = next(
                        (x
                         for x in self.affiliations if affiliation == x.name),
                        None)
                    curr.nbr_scholars += 1
            else:
                # author has a running number
                tmp_scholar = SSSScholar(
                    ' '.join(map(str, words[0:len(words) - 1])),
                    str(words[len(words) - 1]), affiliation)
                self.scholars.append(tmp_scholar)
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.scholars.append(tmp_aff)
                else:
                    curr = next(
                        (x
                         for x in self.affiliations if affiliation == x.name),
                        None)
                    curr.nbr_scholars += 1

    def test_david_notkin(self):
        self.add_sss_scholars(self.test_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()
        david = None
        for scholar in self.scholars:
            if scholar.name == "David Notkin":
                david = scholar

        # TC1: Test that DBLP returns a result
        assert self.scholars != None
        assert len(self.scholars) == 1

        # TC2: Test that David Notkin has 152 DBLP entries
        assert david.dblp_entries == 152

        # TC3: Test that the name is correctly processed
        assert david.name == "David Notkin"

        # TC4: Test that David Notkin has 134 publications after cleaning the list
        assert david.nbr_publications == 134

        # TC5: Test that David Notkin has the correct ratios
        assert david.first_ratio == pytest.approx(0.24, 0.01)
        assert david.sci_ratio == pytest.approx(0.16, 0.01)
        assert david.nbr_sci_publications == 22

        # TC6: Test write results
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size == pytest.approx(1149, 1)
        assert file_stats_csv.st_size == pytest.approx(67, 1)

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert david.sss_contrib == 6.02
        assert david.sss_rating == 24.12

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()
Example #12
0
class TestClass_TwoScholars:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_scholars = [("Simon M. Poulding", "-1",
                               "https://dblp.org/pid/93/6877.xml"),
                              ("Richard C. Holt", "-1",
                               "https://dblp.org/pid/h/RichardCHolt.xml")]

    def add_sss_scholars(self, process_list, affiliation):
        for person in process_list:
            name = person[0]
            running_number = person[1]
            url = person[2]
            # extract the pid from the url by substringing
            try:
                split1 = url.split("pid/")
                split2 = split1[1].split(".xml")
                pid = split2[0]
            except IndexError:
                print("Invalid format of input XML URL.")
                return

            self.scholars.append(
                SSSScholar(name, running_number, pid, url, affiliation, -1))
            tmp_aff = SSSAffiliation(affiliation)
            if tmp_aff not in self.affiliations:
                tmp_aff.nbr_scholars += 1
                self.affiliations.append(tmp_aff)
            else:
                curr = next(
                    (x for x in self.affiliations if affiliation == x.name),
                    None)
                curr.nbr_scholars += 1

    def test_simon_poulding(self):
        self.add_sss_scholars(self.test_scholars, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()
        simon = None
        for scholar in self.scholars:
            if scholar.name == "Simon M. Poulding":
                simon = scholar

        # TC1: Test that DBLP returns a result
        assert len(self.scholars) == 1

        # TC2: Test that Simon Poulding has 48 DBLP entries
        assert simon.dblp_entries == 48

        # TC3: Test that the name is correctly processed
        assert simon.name == "Simon M. Poulding"

        # TC4: Test that Simon Poulding has 41 publications after cleaning the list
        assert simon.nbr_publications == 41

        # TC5: Test that Simon Poulding has the correct ratios
        assert simon.first_ratio == pytest.approx(0.37, 0.01)
        assert simon.sci_ratio == pytest.approx(0.17, 0.01)
        assert simon.nbr_sci_publications == 7

        # TC6: Test write to txt-file
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size == pytest.approx(476, 1)
        assert file_stats_csv.st_size == pytest.approx(139, 1)

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert simon.sss_contrib == 2.84
        assert simon.sss_rating == 8.5

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()

    def test_richard_holst(self):
        self.add_sss_scholars(self.test_scholars, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()
        richard = None
        for scholar in self.scholars:
            if scholar.name == "Richard C. Holt":
                richard = scholar

        # TC1: Test that Richard is removed as a non-SCI first-author
        assert richard is None
Example #13
0
class TestClass_OneScholar:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_scholar = [("David Notkin", "-1",
                              "https://dblp.org/pid/n/DavidNotkin.xml")]

    def add_sss_scholars(self, process_list, affiliation):
        for person in process_list:
            name = person[0]
            running_number = person[1]
            url = person[2]
            # extract the pid from the url by substringing
            try:
                split1 = url.split("pid/")
                split2 = split1[1].split(".xml")
                pid = split2[0]
            except IndexError:
                print("Invalid format of input XML URL.")
                return

            self.scholars.append(
                SSSScholar(name, running_number, pid, url, affiliation, -1))
            tmp_aff = SSSAffiliation(affiliation)
            if tmp_aff not in self.affiliations:
                tmp_aff.nbr_scholars += 1
                self.affiliations.append(tmp_aff)
            else:
                curr = next(
                    (x for x in self.affiliations if affiliation == x.name),
                    None)
                curr.nbr_scholars += 1

    def test_david_notkin(self):
        self.add_sss_scholars(self.test_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.parse_scholars()
        self.scholars = self.miner.get_scholars()
        david = None
        for scholar in self.scholars:
            if scholar.name == "David Notkin":
                david = scholar

        # TC1: Test that DBLP returns a result
        assert self.scholars != None
        assert len(self.scholars) == 1

        # TC2: Test that David Notkin has 159 DBLP entries
        assert david.dblp_entries == 159

        # TC3: Test that the name is correctly processed
        assert david.name == "David Notkin"

        # TC4: Test that David Notkin has 126 publications after cleaning the list
        assert david.nbr_publications == 126

        # TC5: Test that David Notkin has the correct ratios
        assert david.first_ratio == pytest.approx(0.18, 0.01)
        assert david.sci_ratio == pytest.approx(0.17, 0.01)
        assert david.nbr_sci_publications == 22

        # TC6: Test write results
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size == pytest.approx(1149, 1)
        assert file_stats_csv.st_size == pytest.approx(67, 1)

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert david.sss_contrib == 5.94
        assert david.sss_rating == 21.82

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()
Example #14
0
class TestClass_TwoScholars:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_scholars = ["Simon M. Poulding", "Richard C. Holt"]

    def add_sss_scholars(self, process_list, affiliation):
        for name in process_list:
            words = name.split()
            # check if author has a running number
            if not words[len(words) - 1].isdigit():
                self.scholars.append(SSSScholar(name, -1, affiliation))
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.affiliations.append(tmp_aff)
                else:
                    curr = next(
                        (x
                         for x in self.affiliations if affiliation == x.name),
                        None)
                    curr.nbr_scholars += 1
            else:
                # author has a running number
                tmp_scholar = SSSScholar(
                    ' '.join(map(str, words[0:len(words) - 1])),
                    str(words[len(words) - 1]), affiliation)
                self.scholars.append(tmp_scholar)
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.scholars.append(tmp_aff)
                else:
                    curr = next(
                        (x
                         for x in self.affiliations if affiliation == x.name),
                        None)
                    curr.nbr_scholars += 1

    def test_simon_poulding(self):
        self.add_sss_scholars(self.test_scholars, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()
        simon = None
        for scholar in self.scholars:
            if scholar.name == "Simon M. Poulding":
                simon = scholar

        # TC1: Test that DBLP returns a result
        assert len(self.scholars) == 1

        # TC2: Test that Simon Poulding has 48 DBLP entries
        assert simon.dblp_entries == 48

        # TC3: Test that the name is correctly processed
        assert simon.name == "Simon M. Poulding"

        # TC4: Test that Simon Poulding has 42 publications after cleaning the list
        assert simon.nbr_publications == 42

        # TC5: Test that Simon Poulding has the correct ratios
        assert simon.first_ratio == pytest.approx(0.38, 0.01)
        assert simon.sci_ratio == pytest.approx(0.17, 0.01)
        assert simon.nbr_sci_publications == 7

        # TC6: Test write to txt-file
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size == pytest.approx(476, 1)
        assert file_stats_csv.st_size == pytest.approx(139, 1)

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert simon.sss_contrib == 2.85
        assert simon.sss_rating == 8.75

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()

    def test_richard_holst(self):
        self.add_sss_scholars(self.test_scholars, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()
        richard = None
        for scholar in self.scholars:
            if scholar.name == "Richard C. Holt":
                richard = scholar

        # TC1: Test that Richard is removed as a non-SCI first-author
        assert richard is None
Example #15
0
class TestClass_RunningNumber:
    def setup_method(self):
        self.scholars = []
        self.affiliations = []
        self.filename_prefix = str(date.today()) + "_swese_"
        self.test_scholar = ["Thomas Olsson 0001"]

    def add_sss_scholars(self, process_list, affiliation):
        for name in process_list:
            words = name.split()
            # check if author has a running number
            if not words[len(words) - 1].isdigit():
                self.scholars.append(SSSScholar(name, -1, affiliation))
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.scholars.append(tmp_aff)
                else:
                    curr = next(
                        (x
                         for x in self.affiliations if affiliation == x.name),
                        None)
                    curr.nbr_scholars += 1
            else:
                # author has a running number
                tmp_scholar = SSSScholar(
                    ' '.join(map(str, words[0:len(words) - 1])),
                    str(words[len(words) - 1]), affiliation)
                self.scholars.append(tmp_scholar)
                tmp_aff = SSSAffiliation(affiliation)
                if tmp_aff not in self.affiliations:
                    tmp_aff.nbr_scholars += 1
                    self.affiliations.append(tmp_aff)
                else:
                    curr = next(
                        (x
                         for x in self.affiliations if affiliation == x.name),
                        None)
                    curr.nbr_scholars += 1

    def test_thomas_olsson(self):
        self.add_sss_scholars(self.test_scholar, "N/A")
        self.miner = ScholarMiner(self.filename_prefix, self.scholars,
                                  self.affiliations)
        self.miner.process_group()
        self.scholars = self.miner.get_scholars()
        thomas = None
        for scholar in self.scholars:
            if scholar.name == "Thomas Olsson":
                thomas = scholar

        # TC1: Test that DBLP returns a result
        assert self.scholars != None
        assert len(self.scholars) == 1

        # TC2: Test that Thomas Olsson has at least 40 DBLP entries
        assert thomas.dblp_entries >= 40

        # TC3: Test that the name is correctly processed
        assert thomas.name == "Thomas Olsson"

        # TC4: Test that Thomas Olsson has at least 30 publications after cleaning the list
        assert thomas.nbr_publications >= 30

        # TC5: Test that Thomas Olsson has non-zero ratios
        assert thomas.first_ratio >= 0.01
        assert thomas.sci_ratio >= 0.01
        assert thomas.nbr_sci_publications >= 1

        # TC6: Test write results
        self.miner.write_results()
        filename_txt = self.filename_prefix + "1_miner.txt"
        filename_csv = self.filename_prefix + "1_miner.csv"
        assert os.path.exists(filename_txt)
        assert os.path.exists(filename_csv)

        # TC7: Test file sizes
        file_stats_txt = os.stat(filename_txt)
        file_stats_csv = os.stat(filename_csv)
        assert file_stats_txt.st_size > 0
        assert file_stats_csv.st_size > 0

        # TC8: Test analyzer
        analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars,
                                   self.affiliations)
        analyzer.analyze_individual_research_interests()
        assert thomas.sss_contrib >= 1.50
        assert thomas.sss_rating >= 1.00

        # TC10: Test tabulator
        tabulator = ScholarTabulator(self.filename_prefix, self.scholars,
                                     self.affiliations)
        tabulator.write_tables()