Python DataCollection 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dataCollection

클래스/타입: DataCollection

hotexamples.com에서의 예제들: 9

Python DataCollection - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dataCollection.DataCollection에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DataCollection(7)

empty_data_collection(3)

document_already_exist(2)

get_collection_size(2)

push_to_collection(2)

get_all_entries(1)

예제 #1

파일 보기

def clear_database(data_collection_type):
    """Clear the specified data collection in the database

    Args:
        data_collection_type (str):  Name of data collection, either 'book' or 'author'
    """
    collection_name = data_collection_type
    if collection_name not in ('book', 'author'):
        print("Error: no collection named " + data_collection_type +
              ", please enter 'book' or 'author' ")
        return
    database = DataCollection(MONGO_CONNECTION_STRING, "goodReads",
                              collection_name)
    database.empty_data_collection()

예제 #2

파일 보기

def export(data_collection_type, file_path):
    """Export data from the database into json file

    Args:
        dataCollectionType (str): Name of data collection, either 'book' or 'author'
        file_path (str): Path of json file to export data into
    """

    if data_collection_type not in ('book', 'author'):
        print("Error: no collection named " + data_collection_type +
              ", please enter 'book' or 'author' ")
        return
    datacollection = DataCollection(MONGO_CONNECTION_STRING, 'goodReads',
                                    data_collection_type)
    data = datacollection.get_all_entries()
    data = list(data)
    json_data = dumps(data, indent=2)

    with open(file_path, 'w') as file:
        file.write(json_data)

예제 #3

파일 보기

def scrape(data_collection_type, start_url, target_number):
    """Scrape data from goodreads starting with the starting url

    Args:
        data_collection_type (str):  Name of data collection, either 'book' or 'author'
        start_url (str): The url to start scraping from
        target_number (int): Number of books/authors to scrape
    """

    if data_collection_type == "book":
        if not re.search(r'([https://]?)www.goodreads.com/book/show/(.*)',
                         start_url):
            print("Please provide a valid url pointing to a book in goodReads")
            sys.exit(1)
        if target_number > 200:
            print("Cannot scrape more than 200 books at once")
            sys.exit(1)
        data_collection = DataCollection(MONGO_CONNECTION_STRING, "goodReads",
                                         "book")
        book_scraper = BookScraper(data_collection)
        book_scraper.scrapeBooks(start_url, target_number)
    elif data_collection_type == "author":
        if not re.search(r'([https://]?)www.goodreads.com/author/show/(.*)',
                         start_url):
            print(
                "Please provide a valid url pointing to an author in goodReads"
            )
            sys.exit(1)
        if target_number > 50:
            print("Cannot scrape more than 50 authors at once")
            sys.exit(1)
        data_collection = DataCollection(MONGO_CONNECTION_STRING, "goodReads",
                                         "author")
        author_scraper = AuthorScraper(data_collection)
        author_scraper.scrapeAuthors(start_url, target_number)
    else:
        print("Error: no collection named " + data_collection_type +
              ", please enter 'book' or 'author' ")
        return

예제 #4

파일 보기

def import_json(data_collection_type, file_path):
    """Import information in a json file to the database

    Args:
        dataCollectionType (str): Name of data collection, either 'book' or 'author'
        file_path (str): Path of json file to extract info from
    """

    if data_collection_type not in ('book', 'author'):
        print("Error: no collection named " + data_collection_type +
              ", please enter 'book' or 'author' ")
        return
    datacollection = DataCollection(MONGO_CONNECTION_STRING, 'goodReads',
                                    data_collection_type)

    with open(file_path) as file:
        file_data = json.load(file)
    for entry in file_data:
        if "_id" in entry:
            del entry["_id"]
        if not datacollection.document_already_exist(entry):
            datacollection.push_to_collection(entry)

예제 #5

파일 보기

파일: testScraper.py 프로젝트: Roxanne1225/goodReadsScraper

class TestScraper(unittest.TestCase):
    def setUp(self):
        self.testDB = DataCollection(os.getenv('MONGO_CONNECTION_STRING'),
                                     "testDatabase", "testCollection")
        self.bookScraper = BookScraper(self.testDB)
        self.authroScraper = AuthorScraper(self.testDB)

    def testBookScraper(self):
        self.testDB.empty_data_collection()
        testurl = "https://www.goodreads.com/book/show/6185.Wuthering_Heights"
        self.bookScraper.scrape_one_book(testurl)
        self.assertEqual(1, self.testDB.get_collection_size())

    def testAuthorScraper(self):
        self.testDB.empty_data_collection()
        testurl = "https://www.goodreads.com/author/show/6485178.Fredrik_Backman"
        self.authroScraper.scrape_one_author(testurl)
        self.assertEqual(1, self.testDB.get_collection_size())

예제 #6

파일 보기

import pytest
import requests
import os
from dataCollection import DataCollection
from dotenv import load_dotenv
BAES_URL = "http://127.0.0.1:5000/api"

load_dotenv()
MONGO_CONNECTION_STRING = os.getenv('MONGO_CONNECTION_STRING')


book_data_collection = DataCollection(MONGO_CONNECTION_STRING, "goodReads", 'book')
author_data_collection = DataCollection(MONGO_CONNECTION_STRING, "goodReads", 'author')

def test_connect_to_api():
     response = requests.get(BAES_URL)
     assert response.status_code == 200

def test_get_book():
    book = {"url": "testurl2", "id":"testid2"}
    book_data_collection.push_to_collection(book)
    response = requests.get(BAES_URL + '/book?id=testid2')
    assert response.status_code == 200

def test_get_author():
    author = {"url": "testurl2", "id":"testid2"}
    author_data_collection.push_to_collection(author)
    response = requests.get(BAES_URL + '/author?id=testid2')
    assert response.status_code == 200
    
def test_put_author():

예제 #7

파일 보기

파일: testDataCollection.py 프로젝트: Roxanne1225/goodReadsScraper

 def setUp(self):
     self.start_url = "https://www.goodreads.com/book/show/53175355-many-points-of-me"
     self.connection_string = os.getenv('MONGO_CONNECTION_STRING')
     self.testDB = DataCollection(self.connection_string, "testDatabase",
                                  "testCollection")

예제 #8

파일 보기

파일: testDataCollection.py 프로젝트: Roxanne1225/goodReadsScraper

class TestDataCollection(unittest.TestCase):
    def setUp(self):
        self.start_url = "https://www.goodreads.com/book/show/53175355-many-points-of-me"
        self.connection_string = os.getenv('MONGO_CONNECTION_STRING')
        self.testDB = DataCollection(self.connection_string, "testDatabase",
                                     "testCollection")

    def testPushToBookCollection(self):
        self.testDB.empty_data_collection()
        test = {"url": 1, "test": 2}
        self.testDB.push_to_collection(test)
        self.assertEqual(True, self.testDB.document_already_exist(test))

    def testempty_data_collection(self):
        self.testDB.empty_data_collection()
        self.assertEqual(0, self.testDB.get_collection_size())

    def testget_collection_size(self):
        self.testDB.empty_data_collection()
        test1 = {"url": 3, "test": 2}
        test2 = {"url": 1, "test": 1}
        self.testDB.push_to_collection(test1)
        self.testDB.push_to_collection(test2)
        self.assertEqual(2, self.testDB.get_collection_size())

    def testdocument_already_exist(self):
        self.testDB.empty_data_collection()
        test1 = {"url": 3, "test": 2}
        test2 = {"url": 1, "test": 1}
        self.testDB.push_to_collection(test1)
        self.assertEqual(True, self.testDB.document_already_exist(test1))
        self.assertEqual(False, self.testDB.document_already_exist(test2))

예제 #9

파일 보기

파일: testScraper.py 프로젝트: Roxanne1225/goodReadsScraper

 def setUp(self):
     self.testDB = DataCollection(os.getenv('MONGO_CONNECTION_STRING'),
                                  "testDatabase", "testCollection")
     self.bookScraper = BookScraper(self.testDB)
     self.authroScraper = AuthorScraper(self.testDB)