예제 #1
0
    def test_when_pencil_write_at_is_passed_a_string_and_an_index_greater_than_paper_text_length_it_adds_text_to_end(self):
        paper = Paper()
        paper.text = "An apple a day keeps the doctor away"

        self.pencil.write_at(paper, ", don't you know.", 40)

        self.assertEqual(paper.text, "An apple a day keeps the doctor away, don't you know.")
예제 #2
0
    def test_when_pencil_write_at_is_passed_a_string_and_an_index_that_is_in_bounds_but_string_len_plus_index_is_greater_than_paper_text_length_it_overwrites_and_addes_on(self):
        paper = Paper()
        paper.text = "An apple a day keeps the doctor away"

        self.pencil.write_at(paper, "from coming around.", 32)

        self.assertEqual(paper.text, "An apple a day keeps the doctor @@@@ coming around.")
예제 #3
0
    def test_when_pencil_write_at_is_passed_a_string_and_an_index_it_will_write_the_string_on_the_paper_at_that_index_and_overwrite_filled_spaces_with_symbol(self):
        paper = Paper()
        paper.text = "An       a day keeps the doctor away"

        self.pencil.write_at(paper, "artichoke", 3)
        
        self.assertEqual(paper.text, "An artich@k@ay keeps the doctor away")
예제 #4
0
    def test_when_pencil_write_at_is_passed_a_string_and_an_index_it_will_write_the_string_on_the_paper_at_that_index_on_white_space(self):
        paper = Paper()
        paper.text = "An       a day keeps the doctor away"

        self.pencil.write_at(paper, "onion", 3)

        self.assertEqual(paper.text, "An onion a day keeps the doctor away")
예제 #5
0
 def __init__(self, papers, id_to_name, author_papers, treat_id_different_people=False,
              console_log_level=logging.ERROR, file_log_level=logging.DEBUG, log_format=None, log_path=None,
              raise_error=False, skip_error_papers=False, one_target_per_paper=False, save_data=False, ext_directory=False, save_path=None,
              cores=4, remove_all_papers=False):
     if not log_format:
         log_format = '%(asctime)s|%(levelname)8s|%(module)20s|%(funcName)20s: %(message)s'
     if not log_path:
         log_path = os.getcwd() + "/logs/disambiguation.log"
     self.logger = createLogger("disambiguator", log_path, log_format, console_log_level, file_log_level)
     self.console_log_level = console_log_level
     self.treat_id_different_people = treat_id_different_people
     self.papers = {}
     for k, p in papers.items():
         if isinstance(p, Paper):
             self.papers = papers
             break
         self.papers[k] = Paper(**p)
     self.id_to_name = deepcopy(id_to_name)
     self.author_papers = deepcopy(author_papers)
     self.author_id_suffix = Counter()
     self.raise_error = raise_error
     self.error_papers = set()
     self.new_papers = {}
     self.new_author_papers = defaultdict(list)
     self.new_id_to_name = {}
     self.old_ids = set()
     self.skip_errors = skip_error_papers
     self.one_per_paper = one_target_per_paper
     self.save_data = save_data
     self.ext_directory = ext_directory
     self.save_path = save_path
     self.cores = cores
     self.remove_all_papers = remove_all_papers
예제 #6
0
    def _process_results(self, url: str) -> List[Dict]:

        res = requests.get(url).content
        soup = BeautifulSoup(res, 'html.parser')
        papers = [Paper(entry=entry) for entry in soup.findAll('entry')]

        return [p.get_json() for p in papers]
예제 #7
0
    def setUp(self) -> None:

        self.config_raw = json.load(open("config.json"))
        self.config_raw["log path"] = "/tests/targetCreatorTests/logs/"
        self.config_raw["raise error"] = False
        self.config_raw["treat id different people"] = True
        self.config_raw["skip error papers"] = True
        self.log_path = self.config_raw["log path"]
        self.test_authors = [
            "hua-wu", "yun-chen", "victor-ok-li", "linfeng-song", "peng-li",
            "tatsuya-izuha", "yun-huang", "xuan-jing-huang", "qiang-wang"
        ]
        self.test_papers = [
            "W18-5212", "C18-1314", "P16-1159", "P17-1176", "W11-1911",
            "C14-1179", "P07-1089"
        ]
        self.test_multiple_auth = ["P17-1776", "C14-1179"]
        self.test_non_parsed = ["S19-2016"]
        # config = ConfigHandler(self.config_raw,"setup_test_target_creator")
        # data = loadData([ "id_to_name", "author_papers"],config.logger,config)
        self.parsed_raw = json.load(
            open(os.getcwd() +
                 "/tests/authorDisambiguationTests/test_papers.json"))
        self.papers = {x: Paper(**v) for x, v in self.parsed_raw.items()}
        self.author_papers = {}
        self.id_to_name = {}
        for p, v in self.papers.items():
            for a in v.affiliations.keys():
                if a not in self.author_papers:
                    self.author_papers[a] = []
                self.author_papers[a].append(p)
            for a, n in v.authors.items():
                self.id_to_name[a] = n
예제 #8
0
    def test_when_pencil_write_is_passed_a_paper_instance_and_a_string_to_write_it_will_degrade_the_pencil_durability_and_write_the_resulting_string_on_the_paper(self):
        paper = Paper()

        self.assertEqual(self.pencil.current_tip_durability, 1000)

        self.pencil.write(paper, "This is a string to be written.")

        self.assertEqual(paper.text, "This is a string to be written.")
        self.assertEqual(self.pencil.current_tip_durability, 974)
    def setUp(self):
        self.log_path = os.getcwd() + '/tests/pdfParserTests/logs/'
        self.config = json.load(
            open("/home/gabe/Desktop/research-main/config.json"))
        test_paper_path = os.getcwd() + "/tests/pdfParserTests/"
        data_path = os.getcwd() + "/data"

        self.test_paper1_root = etree.XML(
            open(test_paper_path + "test_1.tei.xml", "rb").read())
        self.test_paper1_xml = open(test_paper_path + "test_1.tei.xml",
                                    "rb").read()
        self.test1_key = "Q13-1004"
        self.test_paper2_root = etree.XML(
            open(test_paper_path + "test_2.tei.xml", "rb").read())
        self.test_paper2_xml = open(test_paper_path + "test_2.tei.xml",
                                    "rb").read()
        self.test2_key = "W19-4450"
        self.test_paper3_root = etree.XML(
            open(test_paper_path + "test_3.tei.xml", "rb").read())
        self.test_paper4_root = etree.XML(
            open(test_paper_path + "test_4.tei.xml", "rb").read())

        self.aliases = json.load(open(data_path + "/json/aliases.json"))
        papers_tmp = json.load(open(data_path + "/json/acl_papers.json"))
        self.papers = {x: Paper(**v) for x, v in papers_tmp.items()}
        self.id_to_name = json.load(open(data_path + "/json/id_to_name.json"))
        self.same_names = [
            x.strip()
            for x in open(data_path + "/txt/same_names.txt").readlines()
        ]
        self.parser_args = {
            "aliases": self.aliases,
            "id_to_name": self.id_to_name,
            "same_names": self.same_names,
            "sim_cutoff": .75
        }
        self.wrapper_args = {
            "aliases": self.aliases,
            "papers": self.papers,
            "id_to_name": self.id_to_name,
            "same_names": self.same_names
        }
        self.data_path = os.getcwd() + "/data/"
예제 #10
0
def weekly_papers(event, context):
    print(event)
    print(context)

    linebot = LineBotApi(CHANNEL_ACCESS_TOKEN)

    papers = es.random_search(size=5)
    papers = [Paper(json=p) for p in papers]

    contents = {'type': 'carousel', 'contents': [
                p.get_flex_contents() for p in papers]}

    try:
        linebot.push_message(USER_ID, FlexSendMessage(
            alt_text='Weekly Papers',
            contents=contents
        ))
    except LineBotApiError as e:
        print(e)
예제 #11
0
 def setUp(self) -> None:
     test_papers = json.load(
         open(os.getcwd() +
              "/tests/authorDisambiguationTests/test_papers.json"))
     self.test_papers = {}
     for k, p in test_papers.items():
         self.test_papers[k] = Paper(**p)
     self.config = json.load(open(os.getcwd() + "/config.json"))
     data_path = os.getcwd() + "/data"
     # papers_dict = json.load(open(data_path + "/json/parsed_papers.json"))
     self.incomplete = [
         x.strip() for x in open(data_path +
                                 "/txt/incomplete_papers.txt").readlines()
         if x != "\n"
     ]
     self.papers = {}
     # for k,p in papers_dict.items():
     #     self.papers[k] = Paper(**p)
     self.author_papers = json.load(
         open(data_path + "/json/author_papers.json"))
     self.log_path = os.getcwd() + '/tests/authorDisambiguationTests/logs/'
     org_corpus = [[stemmer.stem(w) for w in x.strip().split()]
                   for x in open(data_path +
                                 "/txt/org_corpus.txt").readlines()]
     department_corpus = [[
         stemmer.stem(w) for w in x.strip().split()
     ] for x in open(data_path + "/txt/department_corpus.txt").readlines()]
     self.incomplete = [
         x.strip() for x in open(data_path +
                                 "/txt/incomplete_papers.txt").readlines()
     ]
     self.compare_authors_args = {
         "company_corpus": org_corpus,
         "department_corpus": department_corpus,
         "threshold": .4,
         "str_algorithm": ["jaro", "similarity"]
     }
     self.id_to_name = json.load(open(data_path + "/json/id_to_name.json"))
예제 #12
0
def webhook(event, context):
    # receive user input

    linebot = LineBotApi(CHANNEL_ACCESS_TOKEN)
    # handler = WebhookHandler(CHANNEL_SECRET)

    # msg = json.loads(event['body'])

    # {"events":[
    #   {"type":"message","replyToken":"a5d6dadb84a346428bc53ea9ce656cea", "message":{"type":"text","id":"13044610237128","text":"yo"}}
    # ]}
    events = json.loads(event['body'])['events']
    for e in events:
        reply_token = e['replyToken']
        text = e['message']['text'].strip()

        papers = es.search(text, ['title', 'abstract'])
        papers = [Paper(json=p) for p in papers]

        if papers:
            contents = {'type': 'carousel', 'contents': [
                p.get_flex_contents() for p in papers]}

            linebot.reply_message(reply_token, FlexSendMessage(
                alt_text=f'papers for {text}',
                contents=contents
            ))
        else:
            linebot.reply_message(reply_token,
                                  TextSendMessage(text='Results Not Found'))

    response = {
        'statusCode': 200,
        'body': json.dumps({'message': 'ok'})
    }

    return response
예제 #13
0
 def test_when_print_text_is_called_it_prints_text_variable_to_stdout(
         self, mock_output):
     paper = Paper()
     paper.write("Hello World!")
     paper.print_text()
     self.assertEqual(mock_output.getvalue(), "Hello World!\n")
예제 #14
0
 def test_when_write_is_passed_string_it_adds_string_to_text_variable_in_the_paper_instance_with_existing_text(
         self):
     paper = Paper()
     paper.text = "Hello "
     paper.write("World!")
     self.assertEqual(paper.text, "Hello World!")
예제 #15
0
    def test_updatePapers(self):
        print("INFO: Testing updatePapers")
        with open(os.getcwd() + self.log_path + "update_papers.log", "w") as f:
            pass
        config = ConfigHandler(self.config_raw, "update_papers")
        author_papers_copy = deepcopy(self.author_papers)
        papers_copy = {x: Paper(**v.asDict()) for x, v in self.papers.items()}
        tests = [
            ["qiang-wang", "qiang-wang1", None],  # No papers passed
            ['hua-wu', "hua-wu1", ['P16-1159']],  # Error papers
            ['yun-chen', "yun-chen1", ['P16-1159']],  # Not in paper
            ['yun-chen', "yun-chen1", ['P17-1176']],
            ['victor-ok-li', "victor-ok-li1",
             ['P17-1176']],  # Paper already done
            ["xuan-jing-huang", "fail-test", ["P19-1642"]],
            ['fail-test', "yun-huang1", ['S19-2016']],
        ]

        target_creator = TargetCreator(papers_copy, self.id_to_name,
                                       author_papers_copy,
                                       **config["TargetCreator"])
        target_creator.one_per_paper = False
        target_creator.error_papers = {"P16-1159"}
        a = tests[0]
        target_creator._updatePapers(*a)
        self.assertEqual(1, len(target_creator.new_papers))
        self.assertEqual(1, len(target_creator.new_author_papers))
        self.assertTrue("qiang-wang1" in target_creator.new_author_papers)
        self.assertTrue("W19-4416" in target_creator.new_papers)
        self.assertTrue(
            "qiang-wang1" in target_creator.new_papers["W19-4416"].authors)
        self.assertTrue("qiang-wang1" in
                        target_creator.new_papers["W19-4416"].affiliations)

        b = tests[1]
        target_creator._updatePapers(*b)
        self.assertEqual(1, len(target_creator.new_papers))
        self.assertEqual(1, len(target_creator.new_author_papers))

        c = tests[2]
        target_creator._updatePapers(*c)
        self.assertEqual(1, len(target_creator.new_papers))
        self.assertEqual(1, len(target_creator.new_author_papers))

        d = tests[3]
        target_creator._updatePapers(*d)
        self.assertEqual(2, len(target_creator.new_papers))
        self.assertEqual(2, len(target_creator.new_author_papers))
        self.assertTrue(
            "qiang-wang1" in target_creator.new_papers["W19-4416"].authors)
        self.assertTrue("qiang-wang1" in
                        target_creator.new_papers["W19-4416"].affiliations)
        self.assertTrue(
            "yun-chen1" in target_creator.new_papers["P17-1176"].authors)
        self.assertTrue(
            "yun-chen1" in target_creator.new_papers["P17-1176"].affiliations)

        e = tests[4]
        target_creator._updatePapers(*e)
        self.assertEqual(2, len(target_creator.new_papers))
        self.assertEqual(3, len(target_creator.new_author_papers))
        self.assertTrue(
            "yun-chen1" in target_creator.new_papers["P17-1176"].authors)
        self.assertTrue(
            "yun-chen1" in target_creator.new_papers["P17-1176"].affiliations)
        self.assertTrue(
            "victor-ok-li1" in target_creator.new_papers["P17-1176"].authors)
        self.assertTrue("victor-ok-li1" in
                        target_creator.new_papers["P17-1176"].affiliations)

        f = tests[5]
        target_creator._updatePapers(*f)
        self.assertEqual(2, len(target_creator.new_papers))
        self.assertEqual(3, len(target_creator.new_author_papers))

        g = tests[6]
        target_creator._updatePapers(*g)
        self.assertEqual(2, len(target_creator.new_papers))
        self.assertEqual(3, len(target_creator.new_author_papers))
예제 #16
0
    def test_when_pencil_write_at_is_passed_a_string_and_an_index_less_than_zero_raises_index_error(self):
        paper = Paper()
        paper.text = "An apple a day keeps the doctor away"

        self.assertRaises(IndexError, lambda: self.pencil.write_at(paper, "Remember, a", -10))
예제 #17
0
 def setUp(self):
     self.paper = Paper()
     self.pencil = pencil_factory.get_no2_hb()
     self.initial_point_durability = self.pencil.point_durability
예제 #18
0
    def test_paper_should_initialize_last_erased_field(self):
        paper = Paper()

        self.assertEqual(paper.last_erased, -1)
예제 #19
0
 def setUp(self):
     self.paper = Paper()
     self.eraser = Eraser(durability=1000)
예제 #20
0
 def setUp(self):
     self.initial_eraser_durability = 1000
     self.paper = Paper()
     self.eraser = Eraser(durability=self.initial_eraser_durability)
예제 #21
0
    def test_paper_text_should_be_set_with_text_property(self):
        paper = Paper()

        paper.text = 'abc'

        self.assertEqual(paper.text, 'abc')
예제 #22
0
    def test_can_initialize_paper_with_text(self):
        paper = Paper(initial_text='Hello Fellow')

        self.assertEqual(paper.text, 'Hello Fellow')
예제 #23
0
    def setUp(self):
        self.config = json.load(
            open("/home/gabe/Desktop/research-main/config.json"))
        data_path = "/home/gabe/Desktop/research-main/data"
        papers_dict = json.load(open(data_path + "/json/parsed_papers.json"))
        self.test_auth_info = json.load(
            open(
                "/home/gabe/Desktop/research-main/tests/createPairTests/test_papers.json"
            ))
        self.incomplete = [
            x.strip() for x in open(data_path +
                                    "/txt/incomplete_papers.txt").readlines()
        ]
        self.test_papers = {
            "N12-1057": {
                "owen-rambow": "Owen Rambow",
                "mona-diab": "Mona Diab",
                "vinodkumar-prabhakaran": "Vinodkumar Prabhakaran"
            },
            "N19-1050": {
                "shima-asaadi": "Shima Asaadi",
                "saif-mohammad": "Saif Mohammad",
                "svetlana-kiritchenko": "Svetlana Kiritchenko"
            },
            "C16-1050": {
                "elaheh-shafieibavani": "Elaheh ShafieiBavani",
                "mohammad-ebrahimi": "Mohammad Ebrahimi",
                "raymond-wong": "Raymond Wong",
                "fang-chen": "Fang Chen"
            },
            "S19-2016": {
                "tobias-putz": "Tobias P\u00fctz",
                "kevin-glocker": "Kevin Glocker"
            },
            "P19-1642": {
                "iacer-calixto": "Iacer Calixto",
                "miguel-rios": "Miguel Rios",
                "wilker-aziz": "Wilker Aziz"
            },
            "W19-4022": {
                "jungyeul-park": "Jungyeul Park",
                "francis-tyers": "Francis Tyers"
            },
            "Q19-1001": {
                "dan-roth": "Dan Roth",
                "alla-rozovskaya": "Alla Rozovskaya"
            },
            "P15-1150": {
                "christopher-d-manning": "Christopher D. Manning",
                "kai-sheng-tai": "Kai Sheng Tai",
                "richard-socher": "Richard Socher"
            },
            'P17-1139': {
                'yang-liu-ict': 'Yang Liu',
                'maosong-sun': 'Maosong Sun',
                'jiacheng-zhang': 'Jiacheng Zhang',
                'huanbo-luan': 'Huanbo Luan',
                'jingfang-xu': 'Jingfang Xu'
            },
            'C10-2136': {
                'yang-liu-ict': 'Yang Liu',
                'yajuan-lu': 'Yajuan Lv',
                'qun-liu': 'Qun Liu',
                'jinsong-su': 'Jinsong Su',
                'haitao-mi': 'Haitao Mi',
                'hongmei-zhao': 'Hongmei Zhao'
            },
            'D18-1041': {
                'yang-liu-ict': 'Yang Liu',
                'jinsong-su': 'Jinsong Su',
                'jiali-zeng': 'Jiali Zeng',
                'huating-wen': 'Huating Wen',
                'jun-xie': 'Jun Xie',
                'yongjing-yin': 'Yongjing Yin',
                'jianqiang-zhao': 'Jianqiang Zhao'
            },
            'Q18-1029': {
                'yang-liu-ict': 'Yang Liu',
                'zhaopeng-tu': 'Zhaopeng Tu',
                'shuming-shi': 'Shuming Shi',
                'tong-zhang': 'Tong Zhang'
            },
            'P17-1176': {
                'yang-liu-ict': 'Yang Liu',
                'victor-ok-li': 'Victor O.K. Li',
                'yun-chen': 'Yun Chen',
                'yong-cheng': 'Yong Cheng'
            },
            'P09-1065': {
                'yang-liu-ict': 'Yang Liu',
                'qun-liu': 'Qun Liu',
                'haitao-mi': 'Haitao Mi',
                'yang-feng': 'Yang Feng'
            },
            'P13-1084': {
                'yang-liu-ict': 'Yang Liu',
                'jun-zhao': 'Jun Zhao',
                'guangyou-zhou': 'Guangyou Zhou',
                'shizhu-he': 'Shizhu He',
                'fang-liu': 'Fang Liu'
            }
        }
        self.test_keys = []
        for k, info in self.test_papers.items():
            for a in info.keys():
                if a == "yang-feng":
                    continue
                self.test_keys.append(k + " " + a)

        self.papers = {}
        self.short_papers = {}
        for k, info in papers_dict.items():
            if k in self.test_papers or k in self.incomplete:
                self.short_papers[k] = Paper(**papers_dict[k])
            self.papers[k] = Paper(**papers_dict[k])
            for a, aff_info in info["affiliations"].items():
                if a == "yang-feng":
                    continue
                if "type" in aff_info and len(aff_info["type"]) > 2:
                    print(k)
                    print(info)
                    break

        self.default_args = dict(author_cutoff=0, drop_null_authors=False)
        self.log_path = os.getcwd() + '/createPairTests/logs/'
예제 #24
0
    def test_should_instantiate_paper_with_empty_text_field(self):
        paper = Paper()

        self.assertIsInstance(paper, Paper)

        self.assertEqual(paper.text, '')
예제 #25
0
 print("INFO: Starting Create Data")
 gc.collect()
 config_raw = json.load(open("config.json"))
 config = ConfigHandler(config_raw,
                        "disambiguate",
                        raise_error_unknown=True)
 data = loadData([
     "department_corpus", "incomplete_papers", "org_corpus", "conflicts",
     "parsed_papers", "same_names", "test_special_keys", "author_papers",
     "id_to_name"
 ], config.logger, config)
 author_papers = data["author_papers"]
 id_to_name = data["id_to_name"]
 same_names = data["same_names"]
 parsed = data["parsed_papers"]
 parsed = {x: Paper(**info) for x, info in parsed.items()}
 org_corpus = data["org_corpus"]
 department_corpus = data["department_corpus"]
 incomplete = data["incomplete_papers"]
 special_keys = data["test_special_keys"]
 input_handler = InputHandler(parsed, author_papers, id_to_name,
                              **config["InputHandler"])
 # input_handler.handleUserInput()
 input_handler.targets = [
     "francisco-m-couto1",
     "qin-lu1",
     "manuel-carlos-diaz-galiano1",
     "luis-nieto-pina1",
     "yang-liu",
     "luciano-del-corro1",
     "izzeddin-gur1",
예제 #26
0
 def setUp(self):
     self.pencil = pencil_factory.get_no2_hb()
     self.paper = Paper()
    def __init__(self, papers=None, author_papers=None, compare_args=None, id_to_name=None,
                 console_log_level=logging.ERROR, file_log_level=logging.DEBUG, log_format=None, log_path=None,
                 save_data=False, ext_directory=False, save_path=None, threshold=.2, name_similarity_cutoff=.92,
                 str_algorithm="jaro-similarity", model=None, model_name="VC1", model_path=None,
                 create_new_author=False, compare_cutoff=3, tie_breaker="max", cores=4, DEBUG_MODE=False,
                 sim_overrides=False, allow_authors_not_in_override=True, same_paper_diff_people=True, use_probabilities=False):
        if not log_format:
            log_format = '%(asctime)s|%(levelname)8s|%(module)20s|%(funcName)20s: %(message)s'
        if not log_path:
            log_path = os.getcwd() + "/logs/disambiguation.log"
        self.logger = createLogger("author_disambiguation", log_path, log_format, console_log_level, file_log_level)
        self.console_log_level = console_log_level
        self.model = model
        self.model_name = model_name
        if self.model is None:
            if not model_path:
                model_path = os.getcwd()
            self.model = pickle.load(open("{}/models/{}/model.pickle".format(model_path, model_name), "rb"))
        try:
            if self.model.voting == "hard" and use_probabilities:
                self.logger.warning("hard voting does not support probabilities")
                self.use_probabilities = False
            else:
                self.use_probabilities = use_probabilities
        except Exception as e:
            self.logger.debug("model does not have voting")
            self.use_probabilities = False

        if not DEBUG_MODE:
            # Argument validation
            if compare_args and not isinstance(compare_args, dict):
                self.logger.error("passed compare_args is not valid")
                self.logger.exception(TypeError("compare_args is not a dict"))
                raise TypeError("compare_args is not a dict")
            elif not compare_args:
                self.logger.error("passed compare_args is not valid")
                self.logger.exception(ValueError("compare_args is None"))
                raise ValueError("compare_args is None")
            else:
                self.compare_args = compare_args

            if author_papers and (not isinstance(author_papers, dict) and not isinstance(author_papers, defaultdict)):
                self.logger.error("passed author_papers is not valid")
                self.logger.error("type is {}".format(type(author_papers)))
                self.logger.exception(TypeError("author_papers is not a dict"))
                raise TypeError("author_papers is not a dict")
            elif not author_papers:
                author_papers, status, error_msg = self._findData("author_papers.json")
                if status != 0:
                    self.logger.error(
                        "passed author_papers is not valid and could not find the file author_papers.json")
                    self.logger.error("self._findData(\"author_papers.json\") returned error {}".format(error_msg))
                    self.logger.exception(ValueError("No valid author_papers found"))
                    raise ValueError("No valid author_papers found")
                else:
                    self.author_papers = deepcopy(author_papers)
            else:
                self.author_papers = deepcopy(author_papers)

            if papers and not isinstance(papers, dict):
                self.logger.error("passed papers is not valid")
                self.logger.exception(TypeError("papers is not a dict"))
                raise TypeError("papers is not a dict")
            elif not papers:
                papers, status, error_msg = self._findData("parsed_papers.json")
                if status != 0:
                    self.logger.error("passed papers is not valid and could not find the file parsed_papers.json")
                    self.logger.error("self._findData(\"parsed_papers.json\") returned error {}".format(error_msg))
                    self.logger.exception(ValueError("No valid parsed_papers found"))
                    raise ValueError("No valid parsed_papers found")
                else:
                    if len(papers) == 0:
                        self.logger.exception(ValueError("Found papers is empty"))
                        raise ValueError("Found papers is empty")
                    self.logger.debug("Converting papers from dict to Paper object")
                    self.papers = {}
                    for k, info in papers.items():
                        self.papers[k] = Paper(**info)

            else:
                if len(papers) == 0:
                    self.logger.exception(ValueError("Passed papers is empty"))
                    raise ValueError("Passed papers is empty")
                test_key = list(papers.keys())[0]
                if isinstance(test_key, dict):
                    self.papers = {}
                    for k, info in papers.items():
                        try:
                            self.papers[k] = Paper(**info)
                        except Exception as e:
                            self.logger.error("Exception raised when converting paper dicts to Paper")
                            self.logger.error("k={}".format(k))
                            self.logger.error("info={}".format(info))
                            self.logger.exception(e)
                            raise e
                else:
                    self.papers = papers

            if id_to_name and not isinstance(id_to_name, dict):
                self.logger.error("passed id_to_name is not valid")
                self.logger.exception(TypeError("id_to_name is not a dict"))
                raise TypeError("id_to_name is not a dict")
            elif not id_to_name:
                id_to_name, status, error_msg = self._findData("id_to_name.json")
                if status != 0:
                    self.logger.error("passed id_to_name is not valid and could not find the file parsed_papers.json")
                    self.logger.error("self._findData(\"id_to_name.json\") returned error {}".format(error_msg))
                    self.logger.exception(ValueError("No valid id_to_name found"))
                    raise ValueError("No valid id_to_name found")
                else:
                    if len(id_to_name) == 0:
                        self.logger.exception(ValueError("Found id_to_name is empty"))
                        raise ValueError("Found id_to_name is empty")
                    self.id_to_name = id_to_name

            else:
                if len(id_to_name) == 0:
                    self.logger.exception(ValueError("Passed id_to_name is empty"))
                    raise ValueError("Passed id_to_name is empty")
                self.id_to_name = id_to_name
        else:
            printLogToConsole(self.console_log_level, "RUNNING IN DEBUG_MODE!", logging.WARNING)
            self.logger.warning("Running in DEBUG_MODE")
            self.id_to_name = id_to_name if id_to_name else {}
            self.papers = papers if papers else {}
            self.compare_args = compare_args if compare_args else {}
            self.author_papers = author_papers if author_papers else {}
        self.compare_terms = len(CompareAuthors.compare_terms)
        self.save_data = save_data
        self.save_dir = save_path
        self.ext_directory = ext_directory
        self.threshold = threshold
        self.name_similarity_cutoff = name_similarity_cutoff
        algo_name, measure = str_algorithm.split("-")
        self.author_name = {x: nameFromDict(self.id_to_name[x]) for x in self.id_to_name.keys()}
        self.cores = cores
        self.str_algorithm = getAlgo(algo_name, measure)
        self.create_new_author = create_new_author
        self.compare_cutoff = compare_cutoff
        self.tie_breaker = tie_breaker
        self.sim_overrides = sim_overrides
        self.allow_authors_not_in_override = allow_authors_not_in_override
        self.same_paper_diff_people = same_paper_diff_people
        self.logger.debug("AuthorDisambiguation initialized with arguments:")
        self.logger.debug("\tcompare_args={}".format(list(self.compare_args.keys())))
        self.logger.debug("\talgorithm={}".format(algo_name))
        self.logger.debug("\tmeasure={}".format(measure))
        self.logger.debug("\tthreshold={}".format(threshold))
        self.logger.debug("\tname_similarity_cutoff={}".format(name_similarity_cutoff))
        self.logger.debug("\tunique authors={}".format(len(self.author_papers)))
        self.logger.debug("\tcompare_cutoff={}".format(self.compare_cutoff))
        self.logger.debug("\ttie_breaker={}".format(self.tie_breaker))
        self.logger.debug("\tsim_overrides={}".format(self.sim_overrides))
        self.logger.debug("\tsame_paper_diff_people={}".format(self.same_paper_diff_people))
        self.logger.debug("\tuse_probabilities={}".format(self.use_probabilities))
        if self.compare_cutoff != 3:
            self.logger.warning("Non-default value for compare_cutoff, currently this is not implemented")