class ValidFakeDatabase: # initializes dummy data to return def __init__(self): self.dateA = datetime.datetime(2013, 8, 4, 12, 30, 45) self.dateB = datetime.datetime(1999, 7, 6, 12, 30, 45) self.paperA = Paper("12345", "The Health Benefits of the All-Bacon Diet", ["4445", "666", "123"], ["Genetics", "Bioinformatics", "Search Engines", "Artificial Intelligence"], "Bacon is actually one of the healthiest foods of all time. This is an abstract! For the full article, download the PDF.", "1234", self.dateA, datetime.datetime.now(), "1111", ["ref1", "ref2", "ref3"], "14000", ["citation link 1", "citation link 2", "citation link 2"], "Your Favorite Publisher",["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]) self.paperB = Paper("90210", "The Dangers of Coding While Hungry", ["12068", "7797", "4326"], ["Genetics", "Bioinformatics", "Search Engines", "Artificial Intelligence"], " Abstracts never seem to be simple or contain useful information.", "444", self.dateA, datetime.datetime.now(), "6677", ["ref1", "ref2", "ref3"], "14000", ["citation link 1", "citation link 2", "citation link 2"], "Your Favorite Publisher",["Andrew Davidson","William Shakespeare","Edsger Dijkstra"]) self.paperC = Paper("666", "The Struggles of Eating a Giordano's Pizza Alone", ["567", "2213", "989"], ["6237", "3177", "432"], "Abstracts are the SparkNotes of the academic world.", "12534434", self.dateB, datetime.datetime.now(), "2345", ["ref1", "ref2", "ref3"], "14000", ["citation link 1", "citation link 2", "citation link 2"], "Prentice Hall", ["Andrew Davidson","William Shakespeare","Edsger Dijkstra"]) self.authorA = Author("55555", "Shia Leboeuf", "4444", ["0", "1"],["The Health Benefits of the All-Bacon Diet", "The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA]) self.authorB = Author("43216", "Andrew Davidson", "1", ["0", "1"],["The Health Benefits of the All-Bacon Diet", "The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA]) self.authorC = Author("6542", "William Shakespeare", "11542", ["2", "1"],["The Struggles of Eating a Giordano's Pizza Alone","The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA]) self.authorD = Author("64632", "Edsger Dijkstra", "147", ["2", "1"],["The Struggles of Eating a Giordano's Pizza Alone","The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA]) self.authorE = Author("63421", "Alan Turing", "40000", ["2", "1"],["The Struggles of Eating a Giordano's Pizza Alone","The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA]) self.tagA = Tag("Genetics", "40000", ["0", "1"]) self.tagB = Tag("Bioinformatics", "12345", ["0", "1"]) self.tagC = Tag("Search Engines", "5555", ["2", "3"]) self.tagD = Tag("Artificial Intelligence", "42", ["2", "3"]) self.publisherA = Publisher("1233", "Your Favorite Publisher",0) self.publisherB = Publisher("3468", "Prentice Hall",0) self.publisherC = Publisher("8372", "Rose-Hulman",0) self.userA = User("0","Otis Redding", ["1", "3"],["Andrew Davidson","Jonathan Jenkins"], [self.paperA, self.paperB, self.paperC], [self.authorA, self.authorB, self.authorC], [self.tagA, self.tagC, self.tagB, self.tagD], "45", "005792830123")
def test_that_pencil_that_starts_dull_only_writes_empty_spaces(self): point_durability = 0 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, 'test') self.assertEqual(' ', paper.display_page()) # paper should contain 4 spaces
def getPaper(self, paperID): title = self.redisDB.get("Paper:"+paperID+":Title") if title == None: return None authorIDs = list(self.redisDB.smembers("Paper:"+paperID+":Authors")) tags = list(self.redisDB.smembers("Paper:"+paperID+":Tags")) abstract = self.redisDB.get("Paper:"+paperID+":Abstract") publisherID = self.redisDB.get("Paper:"+paperID+":PublisherID") viewCount = self.redisDB.get("Paper:"+paperID+":ViewCount") datePosted = datetime.strptime(self.redisDB.get("Paper:"+paperID+":DatePosted"), "%Y-%m-%d %H:%M:%S.%f") # TODO: fix this later datePublished = self.redisDB.get("Paper:"+paperID+":DatePublished") print "datePublished for paper", paperID, title, ":", datePublished datePublished = date(int(datePublished[0:4]), int(datePublished[5:7]), int(datePublished[8:10])) postedByUserID = self.redisDB.get("Paper:"+paperID+":PostedByUserID") references = list(self.redisDB.smembers("Paper:"+paperID+":References")) citedBys = list(self.redisDB.smembers("Paper:"+paperID+":CitedBys")) isUploaded = self.redisDB.get("Paper:"+paperID+":IsUploaded") =='True' authorNames = [] for authorID in authorIDs: authorNames.append(self.redisDB.get("Author:"+authorID+":Name")) publisherGuy = self.getPublisher(publisherID) if publisherGuy is None: publisherName = "No Publisher Name" else: publisherName = publisherGuy.name return Paper(paperID, title, authorIDs, tags, abstract, publisherID, datePublished, datePosted, postedByUserID, references, viewCount, citedBys, publisherName, authorNames, isUploaded)
def setUp(self): durability = 10 self.eraser = Eraser(durability=durability) point_durability = 100 initial_length = 5 self.pencil = Pencil(point_durability=point_durability, initial_length=initial_length, eraser=self.eraser) self.paper = Paper()
def test_that_pencil_not_dull_when_upper_case_text_should_not_use_up_point_durability( self): point_durability = 10 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, 'TEST') self.assertEqual(2, pencil.point_durability)
def read_csv_file(self, csv_name): with open(csv_name) as csvfile: # readCSV = csv.reader(csvfile, delimiter='\n') for row in csvfile: paper = Paper(row.split('/')[-1].split('\n')[0]) self.papers_dict[paper.pmid] = paper self.csv_papers.append(paper.pmid)
def test_that_pencil_not_dull_when_lower_case_text_len_is_less_than_point_durability( self): point_durability = 10 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, 'test') self.assertEqual(6, pencil.point_durability)
def test_that_pencil_point_durability_decrements_by_one_for_special_characters( self): point_durability = 30 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, '~!@#$%^&*()_+`-=:",./<>?{}[]|' ) # 29 special chars written to paper self.assertEqual(1, pencil.point_durability)
def test_that_pencil_point_durability_does_not_change_for_newline_characters( self): point_durability = 10 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, '\ntest\ntest\n\n\n') # 4 newlines written to paper self.assertEqual(2, pencil.point_durability)
def test_that_pencil_writes_where_it_left_off(self): paper = Paper() point_durability = 50 pencil = Pencil(point_durability=point_durability) pencil.write(paper, 'She sells sea shells') pencil.write(paper, ' down by the sea shore') self.assertEqual('She sells sea shells down by the sea shore', paper.display_page())
def test_that_pencil_that_becomes_dull_during_writing_writes_empty_spaces( self): point_durability = 4 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, 'Test') self.assertEqual( 'Tes ', paper.display_page()) # paper should contain one space at end
def test_that_eraser_erases_word_in_opposite_order(self): durability = 3 eraser = Eraser(durability=durability) point_durability = 100 initial_length = 5 pencil = Pencil(point_durability=point_durability, initial_length=initial_length, eraser=eraser) paper = Paper() pencil.write(paper, 'Buffalo Bill') pencil.erase(paper, 'Bill') self.assertEqual('Buffalo B ', paper.display_page())
def test_that_pencil_that_becomes_dull_returns_to_start_point_durability_when_sharpened( self): point_durability = 15 initial_length = 5 pencil = Pencil(point_durability=point_durability, initial_length=initial_length) paper = Paper() pencil.write(paper, 'Test sharpening') self.assertEqual(0, pencil.point_durability) pencil.sharpen() self.assertEqual(15, pencil.point_durability)
def test_that_pencil_that_is_sharpened_before_going_dull_continues_to_write( self): point_durability = 20 initial_length = 5 pencil = Pencil(point_durability=point_durability, initial_length=initial_length) paper = Paper() pencil.write(paper, 'Testing sharpening.') pencil.sharpen() pencil.write(paper, ' Testing sharpening.') self.assertEqual('Testing sharpening. Testing sharpening.', paper.display_page())
def test_that_pencil_that_becomes_dull_picks_up_after_spaces_when_sharpened( self): point_durability = 5 initial_length = 5 pencil = Pencil(point_durability=point_durability, initial_length=initial_length) paper = Paper() pencil.write(paper, 'Test sharpening') self.assertEqual(0, pencil.point_durability) pencil.sharpen() pencil.write(paper, 'Test sharpening') self.assertEqual('Test Test ', paper.display_page())
def create_paper(self, field): ''' Creates a paper in the given field. :param field: string field of new paper :return: tuple with the paper object and other authors ''' paper = Paper(field, self.id) self.add_paper(paper.id, True, field, paper) other_authors = self.choose_authors(self.number_of_coauthors(), field) for author in other_authors: self.simulation.dictionary[author].add_paper( paper.id, False, field, paper) if 'agent' in self.day_actions: self.day_actions['agent'].append(author) else: self.day_actions['agent'] = [author]
def recursion_search_citations(self, paper_pmid, k): """ recursion function for search the papers that cited the original paper :param paper_pmid: the original paper pmid :param k: the number of recursion iterations :return: None (append all papers to self.papers_dict) """ if k == 0: return original_paper = self.papers_dict[paper_pmid] if original_paper == None or original_paper.pm_cited == None: return for new_paper_pmid in original_paper.pm_cited: if new_paper_pmid not in self.papers_dict: new_paper = Paper(new_paper_pmid) new_paper.add_to_pm_cite(paper_pmid) self.papers_dict[new_paper.pmid] = new_paper else: self.papers_dict[new_paper_pmid].add_to_pm_cite(paper_pmid) self.recursion_search_citations(new_paper_pmid, k - 1)
def f(): f = open('input.txt') # read dots dots = set() line = f.readline() while line != '\n': line = line.replace('\n', '').split(',') dots.add((int(line[0]), int(line[1]))) line = f.readline() # read fold instructions fold_instructions = list() line = f.readline() while line != '': line = line.replace('\n', '').split('=') fold_instructions.append((line[0][-1], int(line[1]))) line = f.readline() print(fold_instructions) f.close() # PART I # fold with the first instruction paper = Paper(dots) paper.fold(fold_instructions.pop(0)) # print the number of visible dot print('[f]: Dot count = %d' % (paper.count_dots())) # PART II # fold with the other instructions for fold_instruction in fold_instructions: paper.fold(fold_instruction) # print the result paper print(paper.to_string()) return 0
def test_that_paper_can_be_initialized_with_text(self): paper = Paper('This is the original text of the paper') self.assertEqual('This is the original text of the paper', paper.display_page())
def test_that_pencil_point_durability_does_not_change_for_spaces(self): point_durability = 10 pencil = Pencil(point_durability=point_durability) paper = Paper() pencil.write(paper, 'test ') # 4 spaces written to paper self.assertEqual(6, pencil.point_durability)
def test_that_pencil_writes_on_paper(self): paper = Paper() point_durability = 50 pencil = Pencil(point_durability=point_durability) pencil.write(paper, 'She sells sea shells') self.assertEqual('She sells sea shells', paper.display_page())
def test_that_eraser_does_not_erase_when_it_is_given_an_empty_string(self): input_text = 'How much wood...' paper = Paper(input_text) self.pencil.erase(paper, '') self.assertEqual('How much wood...', paper.display_page())
def setUp(self): self.paper = Paper() point_durability = 50 eraser_durability = 20 eraser = Eraser(eraser_durability) self.pencil = Pencil(point_durability=point_durability, eraser=eraser)
def setUp(self): self.pencil = Pencil() self.paper = Paper()
def test_that_eraser_raises_value_error_if_text_to_erase_is_not_present(self): input_text = 'How much wood would a woodchuck chuck if a woodchuck could chuck wood?' paper = Paper(input_text) with self.assertRaises(ValueError): self.pencil.erase(paper, 'how')
def test_that_initializing_blank_paper_is_blank_paper(self): paper = Paper() self.assertEqual('', paper.display_page())
def __do_search_paper(self, paper_name): if not any(self.__collection_enable_list): print('enable at least one collection first!') error = 'no collection' return error form_data = self.__form_data form_data['value(input1)'] = paper_name form_data['value(select1)'] = 'TI' s = requests.Session() r = s.post(self.__search_root, data=form_data, headers=self.__hearder) self.__require_number += 1 soup = BeautifulSoup(r.text, 'html.parser') # print(soup) paper = None # 在搜索结果第一页中找title相等的论文,第一页没有匹配的就算找不到 for all_paper_info in soup.select('div.search-results-item'): # title title = all_paper_info.select( 'a.smallV110 value')[0].get_text().replace(' ', '|||') title = title.strip() title = title.replace('|||', ' ') if not ''.join(filter(str.isalnum, title)).lower() == ''.join( filter(str.isalnum, paper_name)).lower(): continue else: if paper: error = 'more than one paper founded' return paper, error # Times Cited cited_times_str = findall( r'\d', all_paper_info.select('div.search-results-data-cite') [0].get_text())[0] cited_times = int(cited_times_str) if cited_times > 0: cited_url = self.__root + all_paper_info.select( 'div.search-results-data-cite a')[0]['href'] else: cited_url = '' paper_url = self.__root + all_paper_info.select( 'a.smallV110')[0]['href'] r = s.get(paper_url) self.__require_number += 1 paper_soup = BeautifulSoup(r.text, 'html.parser') # journal journal = paper_soup.select('p.sourceTitle value')[0].get_text() authors = year = ids = None for possible_field in paper_soup.select('p.FR_field'): possible_str = possible_field.get_text() if not authors and possible_str.find( 'By:') >= 0 or possible_str.find(u'作者:') >= 0: authors = findall('(?<=\\()(.+?)(?=\\))', possible_str) if not year and possible_str.find( 'Published:') >= 0 or possible_str.find(u'出版年:') >= 0: year_str = findall(r'\d+', possible_str)[-1] year = int(year_str) if not ids and possible_str.find( 'IDS Number:') >= 0 or possible_str.find( u'IDS 号:') >= 0: ids = findall(r'\w+', possible_str)[-1] paper = Paper(title, authors, journal, year, ids, cited_times, cited_url) if paper: error = 'no error' else: error = 'no such paper' return paper, error
def __do_search_cite_papers(self, paper, collection): cite_papers = [] cite_url = paper.cited_url # 获得引用页面 s = requests.Session() r = s.get(cite_url) self.__require_number += 1 cite_soup = BeautifulSoup(r.text, 'html.parser') if collection is not None: span = cite_soup.select('span#CAScorecard_count_WOS' + COLLECTION_CN[collection])[0] if int(span.get_text()) is 0: return cite_papers, cite_url else: cite_url = self.__root + '/' + span.a['href'].replace( ';jsessionid=' + r.cookies['JSESSIONID'], '') r = s.get(cite_url) self.__require_number += 1 cite_soup = BeautifulSoup(r.text, 'html.parser') # 获得引用论文信息 while True: # 翻页直到最后一页 papers_info = cite_soup.select('div.search-results-item') for paper_info in papers_info: title = paper_info.select( 'a.smallV110 value')[0].get_text().replace(' ', '|||') title = title.strip() title = title.replace('|||', ' ') # Times Cited cited_times_str = findall( r'\d', paper_info.select('div.search-results-data-cite') [0].get_text())[0] cited_times = int(cited_times_str) if cited_times > 0: cited_url = self.__root + paper_info.select( 'div.search-results-data-cite a')[0]['href'] else: cited_url = '' paper_url = self.__root + paper_info.select( 'a.smallV110')[0]['href'] r = s.get(paper_url) self.__require_number += 1 paper_soup = BeautifulSoup(r.text, 'html.parser') # journal journal = paper_soup.select( 'p.sourceTitle value')[0].get_text() # authors, years & IDS authors = year = ids = None for possible_field in paper_soup.select('p.FR_field'): possible_str = possible_field.get_text() if not authors and possible_str.find( 'By:') >= 0 or possible_str.find(u'作者:') >= 0: authors = findall('(?<=\\()(.+?)(?=\\))', possible_str) if not year and possible_str.find( 'Published:') >= 0 or possible_str.find( u'出版年:') >= 0: year_str = findall(r'\d+', possible_str)[-1] year = int(year_str) if not ids and possible_str.find( 'IDS Number:') >= 0 or possible_str.find( u'IDS 号:') >= 0: ids = findall(r'\w+', possible_str)[-1] paper = Paper(title, authors, journal, year, ids, cited_times, cited_url) print(paper) cite_papers.append(paper) # 翻页直到最后一页 total_page = int( cite_soup.select('span[id="pageCount.top"]')[0].get_text()) current_page = int( cite_soup.select('input.goToPageNumber-input')[0]['value']) print('%d of % d' % (current_page, total_page)) if current_page < total_page: r = s.get(cite_soup.select('a.paginationNext')[0]['href']) self.__require_number += 1 cite_soup = BeautifulSoup(r.text, 'html.parser') else: break return cite_papers, cite_url
def __init__(self, lock, logger): self.twitter = Twitter(logger) translate = Translate(logger) self.paper = Paper(logger, translate) self.lock = lock self.logger = logger
def get_info(pdf_file, txt_file): laparams = LAParams() pagenos = set() rsrcmgr = PDFResourceManager(caching=True) outtype = 'text' pages_to_extract = 1 current_page = 0 temp_author = None fp = file(pdf_file, 'rb') file_name = txt_file[:-3] + 'firstpage.txt' out_file = file(file_name, 'w') device = TextConverter(rsrcmgr, out_file, codec='utf-8', laparams=laparams, imagewriter=None) interpreter = PDFPageInterpreter(rsrcmgr, device) paper = Paper() paper.set_pages( PDFPage.get_num(fp, pagenos, maxpages=pages_to_extract, password='', caching=True, check_extractable=True)) laparams = LAParams() rsrcmgr = PDFResourceManager(caching=True) for page in PDFPage.get_pages(fp, pagenos, maxpages=pages_to_extract, password='', caching=True, check_extractable=True): page.rotate = (page.rotate) % 360 interpreter.process_page(page) out_file.close() fp.close() device.close() paper.set_author(find_author(file_name)) paper.set_page_range(find_range(file_name)) paper.set_doi(find_doi(file_name)) line = author_line_num(file_name) paper.set_title(find_title(file_name, line)) paper.set_year(find_year(file_name)) paper.set_publisher(find_publisher(file_name)) os.remove(file_name) """ info_filename = txt_file[:-3]+'info.txt' paper.generate_citations(info_filename) """ return paper