Esempi in Python per DB.DB, esempi in Python per src.db.DB.DB

Esempio n. 1

0

Mostra file

def scrape(id):
    '''Scrape function fetches the page record with the page_id provided,
  Raise an exception if page with the isn't found,
  Updates the page’s is_scraping attribute to true,
  Fetch the HTML content at the page url using requests,
  Parses the fetched HTML content to extract hyperlinks (Maximum 10),
  Deletes existing links that may have been previously saved for the page,
  Saves the newly extracted links to the database for the page,
  Updates the page’s is_scraping attribute to false,
  passes the scraped links to the links table on the database.
  '''
    try:
        the_url = DB.pages().fetch(id)
        if len(the_url) == 0:
            raise Exception
        the_url = the_url[0]
        address = the_url[0]
        DB().pages().update(id, 'True')
        web_request = requests.get(address)
        soup = BeautifulSoup(web_request.text, features='html.parser')
        list_of_links = []
        for link in soup.find_all('a', href=True):
            links = link['href']
            if re.search("^https", links):
                list_of_links.append(links)
        linksy = (list_of_links[:10])
        DB().links().delete(id)
        for url in linksy:
            DB().links().insert(url, id)
        DB().pages().update(id, 'False')
        return '===============Successfully scraped================'
    except Exception as e:
        print(e)

Esempio n. 2

0

Mostra file

def spider_scrap(page_id):
    '''function that recieve a page_id and insert links in the link table'''

    page_ids = [i[0] for i in DB().pages().select()]
    if page_id in page_ids:
        url = DB().pages().fetch_url(page_id)
    else:
        raise ValueError('page_id not valid')

    #update is_scraping to true
    DB().pages().update_id_true(page_id)

    #fetch the html content at the page url
    page = requests.get(url[0])

    # fetching the html content to extract maximum 10 hyperlinks
    soup = BeautifulSoup(page.text, features='html.parser')
    links_list = []
    for link in soup.find_all('a', href=True):
        links = link['href']
        if re.search("^https", links):
            links_list.append(links)
    link_url = links_list[:10]

    DB.links().delete(page_id)

    #saves the newly extratcted links to the database for the page
    for url in link_url:
        DB.links().insert(page_id, url)

    DB().pages().update_id_false(page_id)


# print(spider_scrap(1))

Esempio n. 3

0

Mostra file

 def setUp(s):
     if os.path.isfile(dbPath):
         os.remove(dbPath)
     if hasattr(s, 'db'):
         s.db.dropAll()
     s.db = DB({'db': dbPath})
     s.db.createDb()

Esempio n. 4

0

Mostra file

    def __init__(self,
                 bot_token,
                 admin_id,
                 engine_uri,
                 oc_host,
                 mtproto_proxy,
                 base_dir,
                 log_level='INFO'):
        self.updater = Updater(bot_token, use_context=True)
        self.dispatcher = self.updater.dispatcher
        self.input_dispatcher = \
            {
                #user_id: callback_function
        }

        self.db = DB(engine_uri)

        self.admin_id = admin_id
        self.oc_host = oc_host
        self.mtproto_proxy = mtproto_proxy
        self.base_dir = base_dir

        logging.basicConfig(
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            level={
                'INFO': logging.INFO,
                'DEBUG': logging.DEBUG,
                'ERROR': logging.ERROR,
            }[log_level])

Esempio n. 5

0

Mostra file

File: main.py Progetto: zerkh/nips-scraper

def scrape(start_index):
    db = DB()
    nips = NipsETL(db)
    google = GoogleETL(db)
    arxiv = ArxivETL(db)

    titles = db.all('nips_papers')
    print "found %s nips_papers" % len(titles)
    if len(titles) < NUM_NIPS_17_PAPERS:
        print "fetching..."
        response = nips.extract()
        titles = nips.transform(response)
        nips.load(titles)

    all_nips_papers_missing_abstracts = db.all_nips_papers_missing_abstracts()
    print "found %i nips papers missing abstracts" % len(
        all_nips_papers_missing_abstracts)

    for record in all_nips_papers_missing_abstracts:
        print "fetching #%d: %s" % (record['id'], record['title'])
        try:
            google_response = google.extract(record["title"])
        except RateLimitError:
            break
        search_result = google.transform(record['id'], google_response)
        google.load(search_result)

        if search_result["abstract_url"]:
            print "found search result!"
            arxiv_response = arxiv.extract(search_result["abstract_url"])
            abstract = arxiv.transform(arxiv_response)
            arxiv.load(record["id"], abstract)

    db.to_md("abstracts.md")

Esempio n. 6

0

Mostra file

File: tests.py Progetto: abmyii/downloader_scraper

def test_column_reference():
    k = 'column_which_has_a_really_long_name_longer_than_sixty_four_characters'

    db = DB('sqlite:///:memory:', main_tbl_name="test")
    db.add_record({'id': 1, k: 0})
    db.commit()

    assert k in list(db.retrieve_records())[0].keys()

Esempio n. 7

0

Mostra file

File: core.py Progetto: raririn/SlowDB

 def _dump_db(self, file_path: str):
     new_db = DB(self.currentDB)
     new_db.tables = self.tables
     self.db[self.currentDB] = new_db
     f = open(file_path, 'wb')
     pickle.dump(self.db[self.currentDB], f)
     f.close()
     return 0

Esempio n. 8

0

Mostra file

 def setUp(cls):
     if os.path.isfile(dbPath):
         os.remove(dbPath)
     #game gets an instance of db in the constructor
     cls.db = DB({'db': dbPath})
     cls.db.createDb()
     cls.db.populateInfo('Gino')
     cls.testId = cls.db.addTest('Gino',
                                 'function ciao() { return "Ciao" }')
     cls.db.updateUserInfo('Gino', {"selectedTest": cls.testId})

Esempio n. 9

0

Mostra file

File: core.py Progetto: raririn/SlowDB

    def execute_create_db(self, d):
        '''
        CREATE DATABASE testdb;

        d = {
             'name': 'testdb',
        }
        '''
        if not d['name'] in self.db:
            self.db[d['name']] = DB(d['name'])
            return 0
        else:
            raise Exception('')

Esempio n. 10

0

Mostra file

File: spider.py Progetto: ooakhu/web_scraper

def scrape(id):
    DB.pages().update('True', id)
    url = DB().pages().fetch(id)
    page = requests.get(url[0])
    soup = BeautifulSoup(page.text, features='html.parser')
    a_soup = soup.find_all('a', href=True)
    ext_links = [
        link.get("href") for link in a_soup if "http" in link.get("href")
    ]
    new_links = ext_links[:10]
    DB.links().delete(id)
    for i in new_links:
        DB.links().insert(i, id)

Esempio n. 11

0

Mostra file

File: commands.py Progetto: duke79/leavebot

    def run(self, cmd, args, user):
        slack_id = user
        db = DB(slack_id)

        print("\n")
        print(cmd)
        print("\n")
        print(args)
        if cmd == "help":
            ret = "\n".join((
                "Available commands:",
                "help: Prints the list of available commands",
                "login: User login, required before any other action",
                "apply: Apply for leave",
            ))
            return ret
        elif cmd == "login":
            print(args)
            user_id, user_pass = args.split(' ')
            # print('setting user details')
            db.greythr_user_id = user_id
            db.greythr_password = user_pass
            db.freeze()
            return "Login successful!", None
        elif cmd == "apply":
            print('here')
            start, end = args.split(' ')
            start = f'{start} Dec 2019'
            end = f'{end} Dec 2019'
            print(start)
            print(end)
            print(db.greythr_user_id)
            print(db.greythr_password)
            # userid, passwd = 'T12546', '@123456789'
            # userid, passwd = 'S12667', 'Dynamic@@123'

            # login T12546 @123456789
            # apply ‘18 Dec 2019’ ‘19 Dec 2019’
            # res = asyncio.run(apply(db.greythr_user_id, db.greythr_password, start, end))
            res = asyncio.run(apply('T12546', '@123456789', start, end))
            return res, [{
                "type": "section",
                "text": {
                    "type": "mrkdwn",
                    "text": "dsf"
                }
            }]

        else:
            ret = "Command not available!"
            return ret

Esempio n. 12

0

Mostra file

 def __init__(self):
     
     pyglet.resource.path = ['./res']
     pyglet.resource.reindex()
     
     self.db = DB('localhost', 3306, 'fisica', 'qwe123iop', 'fisica')
     
     platform = pyglet.window.get_platform()
     display = platform.get_default_display()
     self.MW = display.get_screens()[0].width
     self.MH = display.get_screens()[0].height
     
     pyglet.clock.schedule(self.timer)
     self.activateSuck()
     self.window = Frame(self, 400, 400, False, visible=False)
     self.window.set_location(int((self.MW-self.window.width)/2), int((self.MH-self.window.height)/2))
     self.window.setScene(AppLauncher())
     self.window.set_visible(True)

Esempio n. 13

0

Mostra file

File: main.py Progetto: MadRex2000/ptt-title-crawler

def main():
    db = DB()
    db.create()

    datas = GetPttPost(2)
    for data in datas:
        if db.get(data['title']).fetchall():
            db.update(
                db.get_id(data['title']).fetchall()[0][0], data['url'],
                data['author'], data['date'], data['push'])
        else:
            analyze_data = analyze.nlp(data['title'].replace(
                'Re: ',
                '').replace('[新聞] ',
                            '').replace('[爆卦] ',
                                        '').replace('[問卦] ',
                                                    '').replace('[協尋]', ''))
            db.store(data['title'], data['url'], data['author'], data['date'],
                     data['push'], analyze_data)

    chartHandler.handle_data(db.get_all())
    chartHandler.handle_push(db.get_all())

Esempio n. 14

0

Mostra file

File: spider.py Progetto: Resa-Obamwonyi/spider-pyapp

def spider(page_id):
    ''' Takes a page id, selects the url linked to page id and runs the scraper
      Scraper takes url and returns a list of urls scraped,
      a maximum of 10 links are inserted into the database '''

    if type(page_id) != int or page_id == 0:
        raise ValueError('Page Id is not valid')

    get_url = DB.pages().get_url(page_id)

    if get_url is None:
        return ValueError('Page Id not found')

    else:
        url = get_url[0]
        all_links = []

        # set is_scraping to True where id == page_id
        DB.pages().update_by_id(True, page_id)

        res = requests.get(url)
        soup = BeautifulSoup(res.text, 'html.parser')

        for link in soup.find_all('a', href=True):

            if link['href'].startswith('http'):
                all_links.append(link['href'])

        # check if page id is in already in links table, delete all data with page id
        DB.links().delete_by_page_id(page_id)

        for link in all_links[:10]:
            # Insert each link into the links table
            Links(DB().connect()).insert(page_id, link)

        # set is_scraping to False in  where id == page_id
        DB.pages().update_by_id(False, page_id)

Esempio n. 15

0

Mostra file

 def setUp(self) -> None:
     """Set up the connection"""
     self.exec = DB().connection_details()

Esempio n. 16

0

Mostra file

File: test_db.py Progetto: jsatgit/feed-api

 def test_persist(self):
     alice_feeds, bob_feeds, charlie_feeds = get_mock_feeds()
     self.subscribe_user_to_feeds('Alice', alice_feeds)
     self.db.close()
     TestDB.db = DB()
     self.assertItemsEqual(self.db.get_feeds_by_subscriber('Alice'), alice_feeds)

Esempio n. 17

0

Mostra file

 def setUp(self) -> None:
     self.DB = DB().serv_conn()

Esempio n. 18

0

Mostra file

File: main.py Progetto: Remi288/Spiderapp

# Show examples of how you would use ALL your implementations here
from src.db import DB
from src.spider import spider_scrap
from celery import Celery
from decouple import config
#
db = DB()
db.connect()
db.new_connect()
db.setup()
db.seed()
dd = DB.new_connect()
pages = DB.pages()
# pages.fetch_url(2)
print(pages.fetch_url(2))
print(pages.select())
print(pages.find(2))
# print(pages.update_id(1))
links = DB.links()
print(links.insert(1, 'www.goggle.com'))
print(links.delete(1))
print(links.select(1))
# #
# app = Celery('main', broker=config('CELERY_BROKER'), backend=config('CELERY_BACKEND'))
#
#
# @app.task
# def scrap_url():
#   return spider_scrap(1)

# spider_scrap(1)

Esempio n. 19

0

Mostra file

File: test_db.py Progetto: jsatgit/feed-api

 def setUpClass(cls):
     cls.db = DB()

Esempio n. 20

0

Mostra file

 def __init__(self):
     try:
         self.db = DB("mysql", "localhost", "root", "123456", "modeling")
     except:
         self.db = DB("mysql", "10.20.2.26", "root", "123456", "modeling")

Esempio n. 21

0

Mostra file

 def test_update_id_false(self):
     '''test for update_id_false function in pages'''
     DB().setup()
     DB().seed()
     result = (1, 'https://www.facebook.com', False)
     self.assertEqual(self.pages.update_id_false(1)[:3], result)

Esempio n. 22

0

Mostra file

 def test_pages(self):
     """Test the reference to the pages interface"""
     self.assertIsNotNone(DB().pages())

Esempio n. 23

0

Mostra file

File: test_db.py Progetto: Rafiatu/spider-pyapp

 def setUp(self):
     self.db = DB()

Esempio n. 24

0

Mostra file

 def test_links(self):
     """Test the reference to the links interface"""
     self.assertIsNotNone(DB().links())

Esempio n. 25

0

Mostra file

File: test_DB.py Progetto: Resa-Obamwonyi/spider-pyapp

 def setUp(self) -> None:
     self.exec = DB().server_conn()

Esempio n. 26

0

Mostra file

 def test_setup(self):
     """Test the creation of the database table"""
     self.assertIsNone(DB().setup())

Esempio n. 27

0

Mostra file

 def test_connect(self):
     """Test new connection of the database"""
     self.assertIsNotNone(DB().connect())

Esempio n. 28

0

Mostra file

 def test_insert(self):
     DB().setup()
     DB.seed()
     self.assertEqual(self.links.insert(2, 'https://www.wikipedia.com'),
                      None)

Esempio n. 29

0

Mostra file

 def test_seed(self):
     """Test the insert of information into the database table created"""
     self.assertIsNone(DB().seed())

Esempio n. 30

0

Mostra file

File: app.py Progetto: abdjiber/prototype-distance-securitaire

import flask
from flask import Flask
from flask import url_for
from mysql.connector import IntegrityError

from src.user import USER
from src.position import Position
from src import connection_info
from src.db import DB

app = Flask(__name__)
app.secret_key = b'G\xd3\x95iW9\x90\x93M\xf0Aa/XUU'  # CLE UTILISER POUR ENREGISTRER DES COOKIES

db = DB(host=connection_info.DB_HOST,
        db_name=connection_info.DB_NAME,
        table_name=connection_info.TABLE_NAME,
        user_name=connection_info.DB_USER_NAME,
        user_pwd=connection_info.DB_USER_PASS_WORD)
user = USER()


@app.route('/', methods=['POST', 'GET'])
def index():
    return flask.render_template('index.html')


@app.route('/connection', methods=['POST', 'GET'])
def connection():
    city = flask.request.form['ville']
    min_distance = flask.request.form['min_distance']
    uuid = flask.request.form["uuid"]