Ejemplo n.º 1
0
class ValidFakeDatabase:

    # initializes dummy data to return
  def __init__(self):
    self.dateA = datetime.datetime(2013, 8, 4, 12, 30, 45)
	self.dateB = datetime.datetime(1999, 7, 6, 12, 30, 45)
    self.paperA = Paper("12345", "The Health Benefits of the All-Bacon Diet", ["4445", "666", "123"], ["Genetics", "Bioinformatics", "Search Engines", "Artificial Intelligence"], "Bacon is actually one of the healthiest foods of all time.  This is an abstract!  For the full article, download the PDF.", "1234", self.dateA, datetime.datetime.now(), "1111", ["ref1", "ref2", "ref3"], "14000", ["citation link 1", "citation link 2", "citation link 2"], "Your Favorite Publisher",["Alan Turing", "Shia Leboeuf", "Andrew Davidson"])
    self.paperB = Paper("90210", "The Dangers of Coding While Hungry", ["12068", "7797", "4326"], ["Genetics", "Bioinformatics", "Search Engines", "Artificial Intelligence"], " Abstracts never seem to be simple or contain useful information.", "444", self.dateA, datetime.datetime.now(), "6677", ["ref1", "ref2", "ref3"], "14000", ["citation link 1", "citation link 2", "citation link 2"], "Your Favorite Publisher",["Andrew Davidson","William Shakespeare","Edsger Dijkstra"])
    self.paperC = Paper("666", "The Struggles of Eating a Giordano's Pizza Alone", ["567", "2213", "989"], ["6237", "3177", "432"], "Abstracts are the SparkNotes of the academic world.", "12534434", self.dateB, datetime.datetime.now(), "2345", ["ref1", "ref2", "ref3"], "14000", ["citation link 1", "citation link 2", "citation link 2"], "Prentice Hall", ["Andrew Davidson","William Shakespeare","Edsger Dijkstra"])

    self.authorA = Author("55555", "Shia Leboeuf", "4444", ["0", "1"],["The Health Benefits of the All-Bacon Diet", "The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA])
    self.authorB = Author("43216", "Andrew Davidson", "1", ["0", "1"],["The Health Benefits of the All-Bacon Diet", "The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA])
    self.authorC = Author("6542", "William Shakespeare", "11542", ["2", "1"],["The Struggles of Eating a Giordano's Pizza Alone","The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA])
    self.authorD = Author("64632", "Edsger Dijkstra", "147", ["2", "1"],["The Struggles of Eating a Giordano's Pizza Alone","The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA])
    self.authorE = Author("63421", "Alan Turing", "40000", ["2", "1"],["The Struggles of Eating a Giordano's Pizza Alone","The Dangers of Coding While Hungry"],[["Andrew Davidson","William Shakespeare","Edsger Dijkstra"],["Alan Turing", "Shia Leboeuf", "Andrew Davidson"]],[self.dateB,self.dateA])

    self.tagA = Tag("Genetics", "40000", ["0", "1"])
    self.tagB = Tag("Bioinformatics", "12345", ["0", "1"])
    self.tagC = Tag("Search Engines", "5555", ["2", "3"])
    self.tagD = Tag("Artificial Intelligence", "42", ["2", "3"])
    
    self.publisherA = Publisher("1233", "Your Favorite Publisher",0)
    self.publisherB = Publisher("3468", "Prentice Hall",0)
    self.publisherC = Publisher("8372", "Rose-Hulman",0)

    self.userA = User("0","Otis Redding", ["1", "3"],["Andrew Davidson","Jonathan Jenkins"], [self.paperA, self.paperB, self.paperC], [self.authorA, self.authorB, self.authorC], [self.tagA, self.tagC, self.tagB, self.tagD], "45", "005792830123")
Ejemplo n.º 2
0
 def test_that_pencil_that_starts_dull_only_writes_empty_spaces(self):
     point_durability = 0
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper, 'test')
     self.assertEqual('    ',
                      paper.display_page())  # paper should contain 4 spaces
 def getPaper(self, paperID):
   title = self.redisDB.get("Paper:"+paperID+":Title")
   if title == None:
     return None
   authorIDs = list(self.redisDB.smembers("Paper:"+paperID+":Authors"))
   tags = list(self.redisDB.smembers("Paper:"+paperID+":Tags"))
   abstract = self.redisDB.get("Paper:"+paperID+":Abstract")
   publisherID = self.redisDB.get("Paper:"+paperID+":PublisherID")
   viewCount = self.redisDB.get("Paper:"+paperID+":ViewCount")
   datePosted = datetime.strptime(self.redisDB.get("Paper:"+paperID+":DatePosted"), "%Y-%m-%d %H:%M:%S.%f")
   # TODO: fix this later
   datePublished = self.redisDB.get("Paper:"+paperID+":DatePublished")
   print "datePublished for paper", paperID, title, ":", datePublished
   datePublished = date(int(datePublished[0:4]), int(datePublished[5:7]), int(datePublished[8:10]))
   postedByUserID = self.redisDB.get("Paper:"+paperID+":PostedByUserID")
   references = list(self.redisDB.smembers("Paper:"+paperID+":References"))
   citedBys = list(self.redisDB.smembers("Paper:"+paperID+":CitedBys"))
   isUploaded = self.redisDB.get("Paper:"+paperID+":IsUploaded") =='True'
   authorNames = []
   for authorID in authorIDs:
     authorNames.append(self.redisDB.get("Author:"+authorID+":Name"))
   publisherGuy = self.getPublisher(publisherID)
   if publisherGuy is None:
     publisherName = "No Publisher Name"
   else:
     publisherName = publisherGuy.name
   return Paper(paperID, title, authorIDs, tags, abstract, publisherID, datePublished, datePosted, postedByUserID, references, viewCount, citedBys, publisherName, authorNames, isUploaded)
Ejemplo n.º 4
0
 def setUp(self):
     durability = 10
     self.eraser = Eraser(durability=durability)
     point_durability = 100
     initial_length = 5
     self.pencil = Pencil(point_durability=point_durability, initial_length=initial_length, eraser=self.eraser)
     self.paper = Paper()
Ejemplo n.º 5
0
 def test_that_pencil_not_dull_when_upper_case_text_should_not_use_up_point_durability(
         self):
     point_durability = 10
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper, 'TEST')
     self.assertEqual(2, pencil.point_durability)
Ejemplo n.º 6
0
 def read_csv_file(self, csv_name):
     with open(csv_name) as csvfile:
         # readCSV = csv.reader(csvfile, delimiter='\n')
         for row in csvfile:
             paper = Paper(row.split('/')[-1].split('\n')[0])
             self.papers_dict[paper.pmid] = paper
             self.csv_papers.append(paper.pmid)
Ejemplo n.º 7
0
 def test_that_pencil_not_dull_when_lower_case_text_len_is_less_than_point_durability(
         self):
     point_durability = 10
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper, 'test')
     self.assertEqual(6, pencil.point_durability)
Ejemplo n.º 8
0
 def test_that_pencil_point_durability_decrements_by_one_for_special_characters(
         self):
     point_durability = 30
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper, '~!@#$%^&*()_+`-=:",./<>?{}[]|'
                  )  # 29 special chars written to paper
     self.assertEqual(1, pencil.point_durability)
Ejemplo n.º 9
0
 def test_that_pencil_point_durability_does_not_change_for_newline_characters(
         self):
     point_durability = 10
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper,
                  '\ntest\ntest\n\n\n')  # 4 newlines written to paper
     self.assertEqual(2, pencil.point_durability)
Ejemplo n.º 10
0
 def test_that_pencil_writes_where_it_left_off(self):
     paper = Paper()
     point_durability = 50
     pencil = Pencil(point_durability=point_durability)
     pencil.write(paper, 'She sells sea shells')
     pencil.write(paper, ' down by the sea shore')
     self.assertEqual('She sells sea shells down by the sea shore',
                      paper.display_page())
Ejemplo n.º 11
0
 def test_that_pencil_that_becomes_dull_during_writing_writes_empty_spaces(
         self):
     point_durability = 4
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper, 'Test')
     self.assertEqual(
         'Tes ',
         paper.display_page())  # paper should contain one space at end
Ejemplo n.º 12
0
 def test_that_eraser_erases_word_in_opposite_order(self):
     durability = 3
     eraser = Eraser(durability=durability)
     point_durability = 100
     initial_length = 5
     pencil = Pencil(point_durability=point_durability, initial_length=initial_length, eraser=eraser)
     paper = Paper()
     pencil.write(paper, 'Buffalo Bill')
     pencil.erase(paper, 'Bill')
     self.assertEqual('Buffalo B   ', paper.display_page())
Ejemplo n.º 13
0
 def test_that_pencil_that_becomes_dull_returns_to_start_point_durability_when_sharpened(
         self):
     point_durability = 15
     initial_length = 5
     pencil = Pencil(point_durability=point_durability,
                     initial_length=initial_length)
     paper = Paper()
     pencil.write(paper, 'Test sharpening')
     self.assertEqual(0, pencil.point_durability)
     pencil.sharpen()
     self.assertEqual(15, pencil.point_durability)
Ejemplo n.º 14
0
 def test_that_pencil_that_is_sharpened_before_going_dull_continues_to_write(
         self):
     point_durability = 20
     initial_length = 5
     pencil = Pencil(point_durability=point_durability,
                     initial_length=initial_length)
     paper = Paper()
     pencil.write(paper, 'Testing sharpening.')
     pencil.sharpen()
     pencil.write(paper, ' Testing sharpening.')
     self.assertEqual('Testing sharpening. Testing sharpening.',
                      paper.display_page())
Ejemplo n.º 15
0
 def test_that_pencil_that_becomes_dull_picks_up_after_spaces_when_sharpened(
         self):
     point_durability = 5
     initial_length = 5
     pencil = Pencil(point_durability=point_durability,
                     initial_length=initial_length)
     paper = Paper()
     pencil.write(paper, 'Test sharpening')
     self.assertEqual(0, pencil.point_durability)
     pencil.sharpen()
     pencil.write(paper, 'Test sharpening')
     self.assertEqual('Test           Test           ',
                      paper.display_page())
Ejemplo n.º 16
0
    def create_paper(self, field):
        '''
        Creates a paper in the given field.

        :param field: string field of new paper
        :return: tuple with the paper object and other authors
        '''
        paper = Paper(field, self.id)
        self.add_paper(paper.id, True, field, paper)

        other_authors = self.choose_authors(self.number_of_coauthors(), field)

        for author in other_authors:
            self.simulation.dictionary[author].add_paper(
                paper.id, False, field, paper)

            if 'agent' in self.day_actions:
                self.day_actions['agent'].append(author)
            else:
                self.day_actions['agent'] = [author]
Ejemplo n.º 17
0
    def recursion_search_citations(self, paper_pmid, k):
        """
        recursion function for search the papers that cited the original paper
        :param paper_pmid: the original paper pmid
        :param k: the number of recursion iterations
        :return: None (append all papers to self.papers_dict)
        """
        if k == 0: return

        original_paper = self.papers_dict[paper_pmid]
        if original_paper == None or original_paper.pm_cited == None: return

        for new_paper_pmid in original_paper.pm_cited:
            if new_paper_pmid not in self.papers_dict:
                new_paper = Paper(new_paper_pmid)
                new_paper.add_to_pm_cite(paper_pmid)
                self.papers_dict[new_paper.pmid] = new_paper
            else:
                self.papers_dict[new_paper_pmid].add_to_pm_cite(paper_pmid)
            self.recursion_search_citations(new_paper_pmid, k - 1)
Ejemplo n.º 18
0
def f():

    f = open('input.txt')

    # read dots
    dots = set()
    line = f.readline()
    while line != '\n':
        line = line.replace('\n', '').split(',')
        dots.add((int(line[0]), int(line[1])))
        line = f.readline()
    
    # read fold instructions
    fold_instructions = list()
    line = f.readline()
    while line != '':
        line = line.replace('\n', '').split('=')
        fold_instructions.append((line[0][-1], int(line[1])))
        line = f.readline()
    print(fold_instructions)

    f.close()

    # PART I
    # fold with the first instruction
    paper = Paper(dots)
    paper.fold(fold_instructions.pop(0))
    # print the number of visible dot
    print('[f]: Dot count = %d' % (paper.count_dots()))

    # PART II
    # fold with the other instructions
    for fold_instruction in fold_instructions:
        paper.fold(fold_instruction)
    # print the result paper
    print(paper.to_string())

    return 0
Ejemplo n.º 19
0
 def test_that_paper_can_be_initialized_with_text(self):
     paper = Paper('This is the original text of the paper')
     self.assertEqual('This is the original text of the paper',
                      paper.display_page())
Ejemplo n.º 20
0
 def test_that_pencil_point_durability_does_not_change_for_spaces(self):
     point_durability = 10
     pencil = Pencil(point_durability=point_durability)
     paper = Paper()
     pencil.write(paper, 'test    ')  # 4 spaces written to paper
     self.assertEqual(6, pencil.point_durability)
Ejemplo n.º 21
0
 def test_that_pencil_writes_on_paper(self):
     paper = Paper()
     point_durability = 50
     pencil = Pencil(point_durability=point_durability)
     pencil.write(paper, 'She sells sea shells')
     self.assertEqual('She sells sea shells', paper.display_page())
Ejemplo n.º 22
0
 def test_that_eraser_does_not_erase_when_it_is_given_an_empty_string(self):
     input_text = 'How much wood...'
     paper = Paper(input_text)
     self.pencil.erase(paper, '')
     self.assertEqual('How much wood...', paper.display_page())
Ejemplo n.º 23
0
 def setUp(self):
     self.paper = Paper()
     point_durability = 50
     eraser_durability = 20
     eraser = Eraser(eraser_durability)
     self.pencil = Pencil(point_durability=point_durability, eraser=eraser)
Ejemplo n.º 24
0
 def setUp(self):
     self.pencil = Pencil()
     self.paper = Paper()
Ejemplo n.º 25
0
 def test_that_eraser_raises_value_error_if_text_to_erase_is_not_present(self):
     input_text = 'How much wood would a woodchuck chuck if a woodchuck could chuck wood?'
     paper = Paper(input_text)
     with self.assertRaises(ValueError):
         self.pencil.erase(paper, 'how')
Ejemplo n.º 26
0
 def test_that_initializing_blank_paper_is_blank_paper(self):
     paper = Paper()
     self.assertEqual('', paper.display_page())
Ejemplo n.º 27
0
    def __do_search_paper(self, paper_name):
        if not any(self.__collection_enable_list):
            print('enable at least one collection first!')
            error = 'no collection'
            return error
        form_data = self.__form_data
        form_data['value(input1)'] = paper_name
        form_data['value(select1)'] = 'TI'
        s = requests.Session()
        r = s.post(self.__search_root, data=form_data, headers=self.__hearder)
        self.__require_number += 1
        soup = BeautifulSoup(r.text, 'html.parser')
        # print(soup)
        paper = None
        # 在搜索结果第一页中找title相等的论文,第一页没有匹配的就算找不到
        for all_paper_info in soup.select('div.search-results-item'):
            # title
            title = all_paper_info.select(
                'a.smallV110 value')[0].get_text().replace(' ', '|||')
            title = title.strip()
            title = title.replace('|||', ' ')
            if not ''.join(filter(str.isalnum, title)).lower() == ''.join(
                    filter(str.isalnum, paper_name)).lower():
                continue
            else:
                if paper:
                    error = 'more than one paper founded'
                    return paper, error
            # Times Cited
            cited_times_str = findall(
                r'\d',
                all_paper_info.select('div.search-results-data-cite')
                [0].get_text())[0]
            cited_times = int(cited_times_str)
            if cited_times > 0:
                cited_url = self.__root + all_paper_info.select(
                    'div.search-results-data-cite a')[0]['href']
            else:
                cited_url = ''

            paper_url = self.__root + all_paper_info.select(
                'a.smallV110')[0]['href']
            r = s.get(paper_url)
            self.__require_number += 1
            paper_soup = BeautifulSoup(r.text, 'html.parser')
            # journal
            journal = paper_soup.select('p.sourceTitle value')[0].get_text()
            authors = year = ids = None
            for possible_field in paper_soup.select('p.FR_field'):
                possible_str = possible_field.get_text()
                if not authors and possible_str.find(
                        'By:') >= 0 or possible_str.find(u'作者:') >= 0:
                    authors = findall('(?<=\\()(.+?)(?=\\))', possible_str)
                if not year and possible_str.find(
                        'Published:') >= 0 or possible_str.find(u'出版年:') >= 0:
                    year_str = findall(r'\d+', possible_str)[-1]
                    year = int(year_str)
                if not ids and possible_str.find(
                        'IDS Number:') >= 0 or possible_str.find(
                            u'IDS 号:') >= 0:
                    ids = findall(r'\w+', possible_str)[-1]
            paper = Paper(title, authors, journal, year, ids, cited_times,
                          cited_url)
        if paper:
            error = 'no error'
        else:
            error = 'no such paper'
        return paper, error
Ejemplo n.º 28
0
    def __do_search_cite_papers(self, paper, collection):
        cite_papers = []
        cite_url = paper.cited_url
        # 获得引用页面
        s = requests.Session()
        r = s.get(cite_url)
        self.__require_number += 1
        cite_soup = BeautifulSoup(r.text, 'html.parser')
        if collection is not None:
            span = cite_soup.select('span#CAScorecard_count_WOS' +
                                    COLLECTION_CN[collection])[0]
            if int(span.get_text()) is 0:
                return cite_papers, cite_url
            else:
                cite_url = self.__root + '/' + span.a['href'].replace(
                    ';jsessionid=' + r.cookies['JSESSIONID'], '')
                r = s.get(cite_url)
                self.__require_number += 1
                cite_soup = BeautifulSoup(r.text, 'html.parser')
        # 获得引用论文信息
        while True:  # 翻页直到最后一页
            papers_info = cite_soup.select('div.search-results-item')
            for paper_info in papers_info:
                title = paper_info.select(
                    'a.smallV110 value')[0].get_text().replace(' ', '|||')
                title = title.strip()
                title = title.replace('|||', ' ')
                # Times Cited
                cited_times_str = findall(
                    r'\d',
                    paper_info.select('div.search-results-data-cite')
                    [0].get_text())[0]
                cited_times = int(cited_times_str)
                if cited_times > 0:
                    cited_url = self.__root + paper_info.select(
                        'div.search-results-data-cite a')[0]['href']
                else:
                    cited_url = ''

                paper_url = self.__root + paper_info.select(
                    'a.smallV110')[0]['href']
                r = s.get(paper_url)
                self.__require_number += 1
                paper_soup = BeautifulSoup(r.text, 'html.parser')
                # journal
                journal = paper_soup.select(
                    'p.sourceTitle value')[0].get_text()
                # authors, years & IDS
                authors = year = ids = None
                for possible_field in paper_soup.select('p.FR_field'):
                    possible_str = possible_field.get_text()
                    if not authors and possible_str.find(
                            'By:') >= 0 or possible_str.find(u'作者:') >= 0:
                        authors = findall('(?<=\\()(.+?)(?=\\))', possible_str)
                    if not year and possible_str.find(
                            'Published:') >= 0 or possible_str.find(
                                u'出版年:') >= 0:
                        year_str = findall(r'\d+', possible_str)[-1]
                        year = int(year_str)
                    if not ids and possible_str.find(
                            'IDS Number:') >= 0 or possible_str.find(
                                u'IDS 号:') >= 0:
                        ids = findall(r'\w+', possible_str)[-1]
                paper = Paper(title, authors, journal, year, ids, cited_times,
                              cited_url)
                print(paper)
                cite_papers.append(paper)
            # 翻页直到最后一页
            total_page = int(
                cite_soup.select('span[id="pageCount.top"]')[0].get_text())
            current_page = int(
                cite_soup.select('input.goToPageNumber-input')[0]['value'])
            print('%d of % d' % (current_page, total_page))
            if current_page < total_page:
                r = s.get(cite_soup.select('a.paginationNext')[0]['href'])
                self.__require_number += 1
                cite_soup = BeautifulSoup(r.text, 'html.parser')
            else:
                break
        return cite_papers, cite_url
Ejemplo n.º 29
0
 def __init__(self, lock, logger):
     self.twitter = Twitter(logger)
     translate = Translate(logger)
     self.paper = Paper(logger, translate)
     self.lock = lock
     self.logger = logger
Ejemplo n.º 30
0
def get_info(pdf_file, txt_file):

    laparams = LAParams()
    pagenos = set()
    rsrcmgr = PDFResourceManager(caching=True)

    outtype = 'text'
    pages_to_extract = 1
    current_page = 0
    temp_author = None

    fp = file(pdf_file, 'rb')
    file_name = txt_file[:-3] + 'firstpage.txt'
    out_file = file(file_name, 'w')

    device = TextConverter(rsrcmgr,
                           out_file,
                           codec='utf-8',
                           laparams=laparams,
                           imagewriter=None)
    interpreter = PDFPageInterpreter(rsrcmgr, device)

    paper = Paper()
    paper.set_pages(
        PDFPage.get_num(fp,
                        pagenos,
                        maxpages=pages_to_extract,
                        password='',
                        caching=True,
                        check_extractable=True))
    laparams = LAParams()
    rsrcmgr = PDFResourceManager(caching=True)

    for page in PDFPage.get_pages(fp,
                                  pagenos,
                                  maxpages=pages_to_extract,
                                  password='',
                                  caching=True,
                                  check_extractable=True):
        page.rotate = (page.rotate) % 360
        interpreter.process_page(page)

    out_file.close()
    fp.close()
    device.close()

    paper.set_author(find_author(file_name))
    paper.set_page_range(find_range(file_name))
    paper.set_doi(find_doi(file_name))
    line = author_line_num(file_name)
    paper.set_title(find_title(file_name, line))
    paper.set_year(find_year(file_name))
    paper.set_publisher(find_publisher(file_name))

    os.remove(file_name)
    """
	info_filename = txt_file[:-3]+'info.txt'
 	paper.generate_citations(info_filename)
 		"""

    return paper