예제 #1
0
def test_openLibraryStatus_output_on_connection_error(monkeypatch, capsys):
    def mock_requests_head(_):
        raise ConnectionError

    monkeypatch.setattr(requests, "head", mock_requests_head)
    assert not pageStatus(TEST_URL)
    captured = capsys.readouterr()
    assert captured.out == CONNECTION_ERROR_MESSAGE.format(TEST_URL) + "\n"
예제 #2
0
def test_openLibraryStatus_output_for_wrong_status_code(monkeypatch, capsys):
    def mock_requests_head(_):
        return type("_", (), {"status_code": 42})

    monkeypatch.setattr(requests, "head", mock_requests_head)
    assert not pageStatus(TEST_URL)
    captured = capsys.readouterr()
    assert captured.out == CONNECTION_ERROR_MESSAGE.format(TEST_URL) + "\n"
예제 #3
0
def test_openLibraryStatus_output_if_it_can_connect(monkeypatch, capsys,
                                                    status_code):
    def mock_requests_head(_):
        return type("_", (), {"status_code": status_code})

    monkeypatch.setattr(requests, "head", mock_requests_head)
    assert pageStatus(TEST_URL)
    captured = capsys.readouterr()
    assert captured.out == f"Connected to: {TEST_URL}\n"
예제 #4
0
def main(author, similarity):
    # returns all the books writen by an author from openlibrary
    # using similarity for filtering the results
    status = pageStatus(OPEN_LIBRARY_URL)
    if status is not False:
        search_url = "http://openlibrary.org/search.json?author=" + author
        jason = requests.get(search_url)
        jason = jason.text
        data = json.loads(jason)
        data = data["docs"]
        if data != []:
            metr = 0
            books = []
            for i in range(0, len(data) - 1):
                title = data[metr]["title"]
                metr = metr + 1
                books.append(title)
                mylist = list(dict.fromkeys(books))

            #       Filtrering results: trying to erase similar titles
            words = [
                " the ",
                "The ",
                " THE ",
                " The"
                " a ",
                " A ",
                " and ",
                " of ",
                " from ",
                "on",
                "The",
                "in",
            ]

            noise_re = re.compile(
                "\\b(%s)\\W" % ("|".join(map(re.escape, words))), re.I)
            clean_mylist = [noise_re.sub("", p) for p in mylist]

            for i in clean_mylist:
                for j in clean_mylist:
                    a = similar(i, j, similarity)
                    if a is True:
                        clean_mylist.pop(a)

            clean_mylist.sort()
            print(" ~Books found to OpenLibrary Database:\n")
            for i in clean_mylist:
                print(i)
            return clean_mylist
        else:
            print("(!) No valid author name, or bad internet connection.")
            print("Please try again!")
            return None
예제 #5
0
def open_access_button(doi, title):
    status = pageStatus(OPEN_ACCESS_BUTTON)
    if status:
        if doi is not None:
            query = {"doi": doi}
        else:
            query = {"title": title}
        req = requests.get(OPEN_ACCESS_BUTTON, params=query)
        response = req.json()
        return response
    else:
        print(CONNECTION_ERROR_MESSAGE.format("Open Access Button"))
예제 #6
0
def arxiv(term):
    # Searching Arxiv.org and returns a DataFrame with the founded results.
    status = pageStatus(ARCHIV_URL)
    if status:
        br = mechanize.Browser()
        br.set_handle_robots(False)  # ignore robots
        br.set_handle_refresh(False)  #
        br.addheaders = [("User-agent", "Firefox")]

        br.open(ARCHIV_URL)
        br.select_form(nr=0)
        input_form = term
        br.form["query"] = input_form
        ac = br.submit()
        html_from_page = ac
        html_soup = soup(html_from_page, "html.parser")

        t = html_soup.findAll("div", {"class": "list-title mathjax"})
        titles = []
        for i in t:
            raw = i.text
            raw = raw.replace("Title: ", "")
            raw = raw.replace("\n", "")
            titles.append(raw)
        authors = []
        auth_soup = html_soup.findAll("div", {"class": "list-authors"})
        for i in auth_soup:
            raw = i.text
            raw = raw.replace("Authors:", "")
            raw = raw.replace("\n", "")
            authors.append(raw)
        extensions = []
        urls = []
        ext = html_soup.findAll("span", {"class": "list-identifier"})
        for i in ext:
            a = i.findAll("a")
            link = a[1]["href"]
            extensions.append(str(a[1].text))
            urls.append(ARCHIV_BASE + link)

        arxiv_df = pd.DataFrame({
            "Title": titles,
            "Author(s)": authors,
            "Url": urls,
            "Extension": extensions,
        })

        return arxiv_df
    else:
        print(CONNECTION_ERROR_MESSAGE.format("ArXiv"))
        return None
예제 #7
0
def main_organiser(directory):
    status = pageStatus(OPEN_LIBRARY_URL)
    if status is not False:
        book_list = get_books(directory)
        # lists only the files in the given directory
        namepath = []
        with os.scandir(directory) as entries:
            for entry in entries:
                if entry.is_file():
                    namepath.append(entry.name)
        for i in range(0, len(book_list)):
            print("File:", namepath[i])
            try:
                """splitting file name to author and book title for using as
                searching terms to OpenLibrary"""
                a = book_list[i].split("by")
                book = a[1]
                author = a[0]
                a = scraper(book, author)
                print("\n***", book, "  ", author)
                a = a["genre"]
                filename = namepath[i]
                cutpaste(directory, a, filename)
            except IndexError:
                try:
                    a = book_list[i].split("-")
                    book = a[1]
                    author = a[0]
                    a = scraper(book, author)
                    print("\n***", book, "  ", author)
                    a = a["genre"]
                    filename = namepath[i]
                    cutpaste(directory, a, filename)
                except IndexError:
                    print("Unable to organise this file.\n")
                    pass
예제 #8
0
def test_archiv_Status():
    status = pageStatus(url="http://export.arxiv.org/")
    assert status is not False, "Archiv Status =! 200"
예제 #9
0
def test_open_libraryStatus():
    status = pageStatus(url="http://www.openlibrary.org")
    assert status is not False, "OpenLibrary Status =! 200"